Example #1
0
def test_resource_shape():
    with tmpfile('.hdf5') as fn:
        assert resource(fn+'::/data', dshape='10 * int').shape == (10,)
    with tmpfile('.hdf5') as fn:
        assert resource(fn+'::/data', dshape='10 * 10 * int').shape == (10, 10)
    with tmpfile('.hdf5') as fn:
        assert resource(fn+'::/data', dshape='var * 10 * int').shape == (0, 10)
Example #2
0
def test_resource_shape():
    with tmpfile('.bcolz') as fn:
        assert resource(fn, dshape='10 * int').shape == (10,)
    with tmpfile('.bcolz') as fn:
        assert resource(fn, dshape='10 * 10 * int').shape == (10, 10)
    with tmpfile('.bcolz') as fn:
        assert resource(fn, dshape='var * 10 * int').shape == (0, 10)
Example #3
0
def test_resource_gzip():
    with tmpfile('json.gz') as fn:
        assert isinstance(resource(fn), (JSON, JSONLines))
        assert isinstance(resource('json://' + fn), (JSON, JSONLines))
        assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))

    with tmpfile('jsonlines.gz'):
        assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))
Example #4
0
def test_resource_on_file():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri, 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)

    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri + '::' + 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)
Example #5
0
def test_into_sqlite():
    data = [('Alice', 100), ('Bob', 200)]
    ds = datashape.dshape('var * {name: string, amount: int}')

    with tmpfile('.db') as dbpath:
        with tmpfile('.csv') as csvpath:
            csv = into(csvpath, data, dshape=ds, has_header=False)
            sql = resource('sqlite:///%s::mytable' % dbpath, dshape=ds)
            append_csv_to_sql_table(sql, csv)

            assert into(list, sql) == data
Example #6
0
def test_resource_shape():
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='10 * int')
        assert r.shape == (10,)
        r.file.close()
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='10 * 10 * int')
        assert r.shape == (10, 10)
        r.file.close()
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='var * 10 * int')
        assert r.shape == (0, 10)
        r.file.close()
Example #7
0
def test_tuples_to_json():
    ds = dshape('var * {a: int, b: int}')
    with tmpfile('json') as fn:
        j = JSON(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()

    with tmpfile('json') as fn:
        j = JSONLines(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()
Example #8
0
def test_tuples_to_json():
    ds = dshape('var * {a: int, b: int}')
    with tmpfile('json') as fn:
        j = JSON(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()

    with tmpfile('json') as fn:
        j = JSONLines(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()
Example #9
0
def test_fixed_shape():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, 'foo')
        r = resource('hdfstore://' + fn + '::/foo')
        assert isinstance(r.shape, list)
        assert discover(r).shape == (len(df), )
        r.parent.close()
Example #10
0
def test_varlen_dtypes():
    y = np.array([('Alice', 100), ('Bob', 200)],
                dtype=[('name', 'O'), ('amount', 'i4')])
    with tmpfile('.hdf5') as fn:
        dset = into(fn + '::/data', y)

        assert into(list, dset) == into(list, dset)
Example #11
0
def test_create():
    with tmpfile('.hdf5') as fn:
        ds = datashape.dshape('{x: int32, y: {z: 3 * int32}}')
        f = create(h5py.File, dshape='{x: int32, y: {z: 3 * int32}}', path=fn)
        assert isinstance(f, h5py.File)
        assert f.filename == fn
        assert discover(f) == ds
Example #12
0
def test_csv():
    with tmpfile('.csv') as fn:
        csv = CSV(fn,
                  dshape='var * {name: string, amount: int}',
                  delimiter=',')

        assert csv.dialect['delimiter'] == ','
Example #13
0
def people(sc):
    with tmpfile('.txt') as fn:
        df.to_csv(fn, header=False, index=False)
        raw = sc.textFile(fn)
        parts = raw.map(lambda line: line.split(','))
        yield parts.map(lambda person: Row(
            name=person[0], amount=float(person[1]), id=int(person[2])))
Example #14
0
def tbfile():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title',  x)
        d.close()
        f.close()
        yield filename
Example #15
0
def test_resource_with_variable_length():
    with tmpfile('.hdf5') as fn:
        os.remove(fn)
        ds = datashape.dshape('var * 4 * int32')
        r = resource(fn + '::/data', dshape=ds)

        assert r.shape == (0, 4)
Example #16
0
def test_resource_existing_ctable():
    with tmpfile('.bcolz') as fn:
        r = into(fn, y)
        r.flush()

        r2 = resource(fn)
        assert eq(r2[:], y)
Example #17
0
def test_empty_line():
    text = '{"a": 1}\n{"a": 2}\n\n'  # extra endline
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            f.write(text)
        j = JSONLines(fn)
        assert len(convert(list, j)) == 2
Example #18
0
def test_resource_ctable():
    with tmpfile('.bcolz') as fn:
        r = resource(fn,
                     dshape='var * {name: string[5, "ascii"], balance: int32}')

        assert isinstance(r, ctable)
        assert r.dtype == [('name', 'S5'), ('balance', 'i4')]
Example #19
0
def test_resource():
    with tmpfile('json') as fn:
        assert isinstance(resource('jsonlines://' + fn), JSONLines)
        assert isinstance(resource('json://' + fn), JSON)

        assert isinstance(
            resource(fn, expected_dshape=dshape('var * {a: int}')), JSONLines)
Example #20
0
def test_resource_ctable_correctly_infers_length():
    with tmpfile('.bcolz') as fn:
        r = resource(fn, dshape='100 * int32')

        assert isinstance(r, carray)
        assert r.dtype == 'i4'
        assert get_expectedlen(r) == 100
Example #21
0
def test_drop():
    with tmpfile('.bcolz') as fn:
        r = resource(fn, dshape='var * {name: string[5, "ascii"], balance: int32}')

        assert os.path.exists(fn)
        drop(fn)
        assert not os.path.exists(fn)
Example #22
0
def test_empty_line():
    text = '{"a": 1}\n{"a": 2}\n\n'  # extra endline
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            f.write(text)
        j = JSONLines(fn)
        assert len(convert(list, j)) == 2
Example #23
0
def test_resource_carray():
    with tmpfile('.bcolz') as fn:
        r = resource(fn, dshape='var * int32')

        assert isinstance(r, carray)
        assert r.dtype == 'i4'
        assert r.shape == (0,)
Example #24
0
def test_resource_existing_carray():
    with tmpfile('.bcolz') as fn:
        r = resource(fn, dshape='var * int32')
        append(r, [1, 2, 3])
        r.flush()
        newr = resource(fn)
        assert isinstance(newr, carray)
Example #25
0
def test_append():
    with tmpfile('log') as fn:
        t = TextFile(fn)
        append(t, ['Hello', 'World'])

        assert os.path.exists(fn)
        with open(fn) as f:
            assert list(map(str.strip, f.readlines())) == ['Hello', 'World']
Example #26
0
def test_engine_metadata_caching():
    with tmpfile('db') as fn:
        engine = resource('sqlite:///' + fn)
        a = resource('sqlite:///' + fn + '::a', dshape=dshape('var * {x: int}'))
        b = resource('sqlite:///' + fn + '::b', dshape=dshape('var * {y: int}'))

        assert a.metadata is b.metadata
        assert engine is a.bind is b.bind
Example #27
0
def test_resource_with_variable_length():
    with tmpfile('.hdf5') as fn:
        ds = datashape.dshape('var * 4 * int32')
        r = resource(fn + '::/data', dshape=ds)
        try:
            assert r.shape == (0, 4)
        finally:
            r.file.close()
Example #28
0
def jsonlines_file(data):
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            for item in data:
                json.dump(item, f, default=json_dumps)
                f.write('\n')

        yield fn
Example #29
0
def test_drop():
    with tmpfile('json') as fn:
        js = JSON(fn)
        append(js, [1, 2, 3])

        assert os.path.exists(fn)
        drop(js)
        assert not os.path.exists(fn)
Example #30
0
def test_read_gzip():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        s = json.dumps(dat).encode('utf-8')
        f.write(s)
        f.close()
        js = JSON(fn)
        assert convert(list, js) == dat
Example #31
0
def test_into_resource():
    with tmpfile('.hdf5') as fn:
        d = into(fn + '::/x', x)
        try:
            assert d.shape == x.shape
            assert eq(d[:], x[:])
        finally:
            d.file.close()
Example #32
0
def jsonlines_file(data):
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            for item in data:
                json.dump(item, f, default=json_dumps)
                f.write('\n')

        yield fn
Example #33
0
def test_read_gzip():
    with tmpfile('json.gz') as fn:
        f = gzip.open(fn, 'wb')
        s = json.dumps(dat).encode('utf-8')
        f.write(s)
        f.close()
        js = JSON(fn)
        assert convert(list, js) == dat
Example #34
0
def test_copy_with_into():
    with tmpfile('.hdf5') as fn:
        dset = into(fn + '::/data', [1, 2, 3])
        try:
            assert dset.shape == (3,)
            assert eq(dset[:], [1, 2, 3])
        finally:
            dset.file.close()
Example #35
0
def test_resource_carray_overrides_expectedlen():
    with tmpfile('.bcolz') as fn:
        r = resource(fn, dshape='100 * int32', expectedlen=200)

        assert isinstance(r, carray)
        assert r.dtype == 'i4'
        assert r.shape == (100,)
        assert get_expectedlen(r) == 200
Example #36
0
def test_resource_ctable_correctly_infers_length():
    with tmpfile('.bcolz') as fn:
        r = resource(fn,
                     dshape='100 * {name: string[5, "ascii"], balance: int32}')

        assert isinstance(r, ctable)
        assert r.dtype == [('name', 'S5'), ('balance', 'i4')]
        assert all(get_expectedlen(r[c]) == 100 for c in r.names)
Example #37
0
def test_resource():
    with tmpfile('.hdf5') as fn:
        os.remove(fn)
        ds = datashape.dshape('{x: int32, y: 3 * int32}')
        r = resource(fn, dshape=ds)

        assert isinstance(r, h5py.File)
        assert discover(r) == ds
Example #38
0
def test_append_json():
    with tmpfile('json') as fn:
        j = JSON(fn)
        append(j, dat)
        with open(j.path) as f:
            lines = f.readlines()
        assert len(lines) == 1
        assert 'Alice' in lines[0]
        assert 'Bob' in lines[0]
Example #39
0
def test_pandas_writes_header_by_default():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()
Example #40
0
def test_write_gzip_lines():
    with tmpfile('json.gz') as fn:
        j = JSONLines(fn)
        append(j, dat)

        f = gzip.open(fn)
        line = next(f)
        f.close()
        assert line.decode('utf-8').strip() == str(json.dumps(dat[0]))
Example #41
0
def test_write_gzip():
    with tmpfile('json.gz') as fn:
        j = JSON(fn)
        append(j, dat)

        f = gzip.open(fn)
        text = f.read()
        f.close()
        assert text.decode('utf-8').strip() == str(json.dumps(dat))
Example #42
0
def test_resource_with_datapath():
    with tmpfile('.hdf5') as fn:
        ds = datashape.dshape('3 * 4 * int32')
        r = resource(fn + '::/data', dshape=ds)

        assert isinstance(r, h5py.Dataset)
        assert discover(r) == ds
        assert r.file.filename == fn
        assert r.file['/data'] == r
Example #43
0
def test_resource_existing_carray():
    with tmpfile('.bcolz') as fn:
        os.remove(fn)
        r = resource(fn, dshape=discover(y))
        append(r, y)
        r.flush()

        r2 = resource(fn)
        assert eq(r2[:], y)
Example #44
0
def test_into_double_string():
    with filetext('alice,1\nbob,2', extension='.csv') as source:
        assert into(list, source) == [('alice', 1), ('bob', 2)]

        with tmpfile('.csv') as target:
            csv = into(target, source)
            assert isinstance(csv, CSV)
            with open(target) as f:
                assert 'alice' in f.read()
Example #45
0
def test_resource_to_engine_to_create_tables():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        ds = datashape.dshape('{mytable: var * {name: string, amt: int}}')
        r = resource(uri, dshape=ds)
        assert isinstance(r, sa.engine.Engine)
        assert r.dialect.name == 'sqlite'

        assert discover(r) == ds
Example #46
0
def test_pandas_writes_header_by_default():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()
Example #47
0
def file(df):
    with tmpfile('.hdf5') as fn:
        f = pd.HDFStore(fn)
        f.put('/data', df, format='table', append=True)

        try:
            yield fn, f, f.get_storer('/data')
        finally:
            f.close()