Example #1
0
def test_tmpfile():
    with tmpfile() as f:
        with open(f, 'w') as a:
            a.write('')
        with tmpfile() as g:
            assert f != g

    assert not os.path.exists(f)
Example #2
0
def test_resource():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri, 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)

    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri + '::' + 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)
Example #3
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='2 * int')
             B = resource(b, schema='2 * int', mode='a')
             B = into(B, A)
             assert tuplify(list(B)) == ((1, 1), (2, 2))
Example #4
0
def data():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title', x)
        yield d
        d.close()
        f.close()
def data():
    with tmpfile(".h5") as filename:
        f = tb.open_file(filename, mode="w")
        d = f.create_table("/", "title", x)
        yield d
        d.close()
        f.close()
Example #6
0
def test_into_filename_filename():
    with filetext('1,2\n3,4', extension='csv') as source_fn:
        with tmpfile('csv') as target_fn:
            into(target_fn, source_fn)

            csv = CSV(target_fn)
            assert into(list, csv) == [(1, 2), (3, 4)]
Example #7
0
def test_into_cds_mixed():
    pytest.importorskip('bokeh')
    from bokeh.objects import ColumnDataSource
    n = 25
    ddict = {'first': np.random.choice(list('abc'), size=n),
             'second': np.random.choice(['cachaça', 'tres leches', 'pizza'],
                                        size=n),
             'third': list(range(n))}
    df = pd.DataFrame(ddict)
    with tmpfile('.csv') as fn:
        df.to_csv(fn, header=None, index=False, encoding='utf8')
        csv = CSV(fn, columns=['first', 'second', 'third'], encoding='utf8')
        t = Table(csv)

        cds = into(ColumnDataSource, t)
        assert isinstance(cds, ColumnDataSource)
        expected = dict((k, into(list, csv[:, k]))
                        for k in ['first', 'second', 'third'])
        assert cds.data == expected

        cds = into(ColumnDataSource, t[['first', 'second']])
        assert isinstance(cds, ColumnDataSource)
        expected = dict((k, into(list, csv[:, k]))
                        for k in ['first', 'second'])
        assert cds.data == expected

        cds = into(ColumnDataSource, t['first'])
        assert isinstance(cds, ColumnDataSource)
        assert cds.data == {'first': into(list, csv[:, 'first'])}
def data():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title',  x)
        yield d
        d.close()
        f.close()
Example #9
0
 def test_into(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='{x: int, y: int}')
             B = resource(b, schema='{x: int, y: int}', mode='a')
             B = into(B, A)
             assert into(list, B) == [(1, 1), (2, 2)]
 def test_copy(self):
     with filetext('1,1\n2,2', extension='.csv') as a:
         with tmpfile(extension='.csv') as b:
             A = resource(a, schema='2 * int')
             B = resource(b, schema='2 * int', mode='a')
             copy(A, B)
             assert list(B) == [[1, 1], [2, 2]]
Example #11
0
def engine():
    tbl = 'testtable'
    with tmpfile('db') as filename:
        engine = sqlalchemy.create_engine('sqlite:///' + filename)
        t = resource('sqlite:///' + filename + '::' + tbl,
                     dshape='var * {a: int32, b: int32}')
        yield engine, t
Example #12
0
def PyTables(path, datapath, dshape=None, **kwargs):
    """Create or open a ``tables.Table`` object.

    Parameters
    ----------
    path : str
        Path to a PyTables HDF5 file.
    datapath : str
        The name of the node in the ``tables.File``.
    dshape : str or datashape.DataShape
        DataShape to use to create the ``Table``.

    Returns
    -------
    t : tables.Table

    Examples
    --------
    >>> from blaze.utils import tmpfile
    >>> # create from scratch
    >>> with tmpfile('.h5') as f:
    ...     t = PyTables(filename, '/bar',
    ...                  dshape='var * {volume: float64, planet: string[10, "A"]}')
    ...     data = [(100.3, 'mars'), (100.42, 'jupyter')]
    ...     t.append(data)
    ...     t[:]  # doctest: +SKIP
    ...
    array([(100.3, b'mars'), (100.42, b'jupyter')],
          dtype=[('volume', '<f8'), ('planet', 'S10')])
    """
    def possibly_create_table(filename, dtype):
        f = tb.open_file(filename, mode='a')
        try:
            if datapath not in f:
                if dtype is None:
                    raise ValueError('dshape cannot be None and datapath not'
                                     ' in file')
                else:
                    f.create_table('/',
                                   datapath.lstrip('/'),
                                   description=dtype)
        finally:
            f.close()

    if dshape:
        if isinstance(dshape, str):
            dshape = datashape.dshape(dshape)
        if dshape[0] == datashape.var:
            dshape = dshape.subshape[0]
        dtype = dtype_to_pytables(datashape.to_numpy_dtype(dshape))
    else:
        dtype = None

    if os.path.exists(path):
        possibly_create_table(path, dtype)
    else:
        with tmpfile('.h5') as filename:
            possibly_create_table(filename, dtype)
            shutil.copyfile(filename, path)
    return tb.open_file(path, mode='a').get_node(datapath)
Example #13
0
def test_into_filename():
    with tmpfile('csv') as filename:
        df = DataFrame([['Alice', 100], ['Bob', 200]],
                       columns=['name', 'amount'])
        into(filename, df)

        csv = CSV(filename)
        assert into(list, csv) == into(list, df)
Example #14
0
def csv():
    data = [(1, 2), (10, 20), (100, 200)]

    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{a: int32, b: int32}')
        csv.extend(data)
        csv = CSV(filename, schema='{a: int32, b: int32}')
        yield csv
Example #15
0
def h():
    with tmpfile('.hdf5') as f:
        f = h5py.File(f)
        fx = f.create_dataset('/x', shape=x.shape, dtype=x.dtype, chunks=True,
                maxshape=(None,))
        fx[:] = x

        yield f
Example #16
0
def file():
    with tmpfile('.h5') as filename:
        f = h5py.File(filename)
        d = f.create_dataset('/x', shape=x.shape, dtype=x.dtype,
                             fillvalue=0.0, chunks=(4, 6))
        d[:] = x
        yield f
        f.close()
Example #17
0
def test_into_filename():
    with tmpfile('csv') as filename:
        df = DataFrame([['Alice', 100], ['Bob', 200]],
                       columns=['name', 'amount'])
        into(filename, df)

        csv = CSV(filename)
        assert into(list, csv) == into(list, df)
Example #18
0
def data():
    with tmpfile('.h5') as filename:
        f = h5py.File(filename)
        d = f.create_dataset('/x', shape=x.shape, dtype=x.dtype,
                                   fillvalue=0.0, chunks=(4, 6))
        d[:] = x
        yield d
        f.close()
Example #19
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
Example #20
0
def csv():
    data = [(1, 2), (10, 20), (100, 200)]

    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{a: int32, b: int32}')
        csv.extend(data)
        csv = CSV(filename, schema='{a: int32, b: int32}')
        yield csv
Example #21
0
def test_table_resource():
    with tmpfile('csv') as filename:
        csv = CSV(filename, 'w', schema='{x: int, y: int}')
        csv.extend([[1, 2], [10, 20]])

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert list(compute(t)) == list(csv)
Example #22
0
def PyTables(path, datapath, dshape=None, **kwargs):
    """Create or open a ``tables.Table`` object.

    Parameters
    ----------
    path : str
        Path to a PyTables HDF5 file.
    datapath : str
        The name of the node in the ``tables.File``.
    dshape : str or datashape.DataShape
        DataShape to use to create the ``Table``.

    Returns
    -------
    t : tables.Table

    Examples
    --------
    >>> from blaze.utils import tmpfile
    >>> # create from scratch
    >>> with tmpfile('.h5') as f:
    ...     t = PyTables(filename, '/bar',
    ...                  dshape='var * {volume: float64, planet: string[10, "A"]}')
    ...     data = [(100.3, 'mars'), (100.42, 'jupyter')]
    ...     t.append(data)
    ...     t[:]  # doctest: +SKIP
    ...
    array([(100.3, b'mars'), (100.42, b'jupyter')],
          dtype=[('volume', '<f8'), ('planet', 'S10')])
    """
    def possibly_create_table(filename, dtype):
        f = tb.open_file(filename, mode='a')
        try:
            if datapath not in f:
                if dtype is None:
                    raise ValueError('dshape cannot be None and datapath not'
                                     ' in file')
                else:
                    f.create_table('/', datapath.lstrip('/'), description=dtype)
        finally:
            f.close()

    if dshape:
        if isinstance(dshape, str):
            dshape = datashape.dshape(dshape)
        if dshape[0] == datashape.var:
            dshape = dshape.subshape[0]
        dtype = dtype_to_pytables(datashape.to_numpy_dtype(dshape))
    else:
        dtype = None

    if os.path.exists(path):
        possibly_create_table(path, dtype)
    else:
        with tmpfile('.h5') as filename:
            possibly_create_table(filename, dtype)
            shutil.copyfile(filename, path)
    return tb.open_file(path, mode='a').get_node(datapath)
Example #23
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
Example #24
0
def good_csv():
    with tmpfile(".csv") as filename:
        with open(filename, mode='w') as f:
            # Insert a new record
            f.write("userid,text,country\n")
            f.write("1,Alice,az\n")
            f.write("2,Bob,bl\n")
            f.write("3,Charlie,cz\n")
        yield filename
def idx_data():
    with tmpfile('.h5') as fn:
        f = tb.open_file(fn, mode='w')
        d = f.create_table('/', 'title', x)
        d.cols.amount.create_index()
        d.cols.id.create_index()
        yield d
        d.close()
        f.close()
Example #26
0
def date_data():
    data = [('Alice', 100.0, datetime(2014, 9, 11, 0, 0, 0, 0)),
            ('Alice', -200.0, datetime(2014, 9, 10, 0, 0, 0, 0)),
            ('Bob', 300.0, None)]
    schema = dshape('{name: string, amount: float32, date: ?datetime}')
    with tmpfile('.csv') as f:
        csv = CSV(f, schema=schema, mode='w')
        csv.extend(data)
        yield CSV(f, schema=schema, mode='r')
def csi_data():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title', x)
        d.cols.amount.create_csindex()
        d.cols.id.create_csindex()
        yield d
        d.close()
        f.close()
Example #28
0
def test_create_index_uri():
    from blaze.data.csv import drop
    with tmpfile(extension='.db') as fn:
        uri = 'sqlite:///%s::table' % fn
        sql = resource(uri, schema='{x: int, y: int}')
        create_index(uri, 'x', name='x_index')
        sql = resource(uri, schema='{x: int, y: int}')

        assert list(list(sql.table.indexes)[0].columns)[0].name == 'x'
Example #29
0
def good_csv():
    with tmpfile(".csv") as filename:
        with open(filename, mode='w') as f:
            # Insert a new record
            f.write("userid,text,country\n")
            f.write("1,Alice,az\n")
            f.write("2,Bob,bl\n")
            f.write("3,Charlie,cz\n")
        yield filename
def csi_data():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title', x)
        d.cols.amount.create_csindex()
        d.cols.id.create_csindex()
        yield d
        d.close()
        f.close()
def idx_data():
    with tmpfile('.h5') as fn:
        f = tb.open_file(fn, mode='w')
        d = f.create_table('/', 'title', x)
        d.cols.amount.create_index()
        d.cols.id.create_index()
        yield d
        d.close()
        f.close()
Example #32
0
def test_create_index_uri():
    from blaze.data.csv import drop
    with tmpfile(extension='.db') as fn:
        uri = 'sqlite:///%s::table' % fn
        sql = resource(uri, schema='{x: int, y: int}')
        create_index(uri, 'x', name='x_index')
        sql = resource(uri, schema='{x: int, y: int}')

        assert list(list(sql.table.indexes)[0].columns)[0].name == 'x'
Example #33
0
def test_csv_with_trailing_commas():
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            # note the trailing space in the header
            f.write('a,b,c, \n1, 2, 3, ')
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', ''
        ]
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            f.write('a,b,c,\n1, 2, 3, ')  # NO trailing space in the header
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', 'Unnamed: 3'
        ]
def idx_data():
    with tmpfile(".h5") as fn:
        f = tb.open_file(fn, mode="w")
        d = f.create_table("/", "title", x)
        d.cols.amount.create_index()
        d.cols.id.create_index()
        yield d
        d.close()
        f.close()
def csi_data():
    with tmpfile(".h5") as filename:
        f = tb.open_file(filename, mode="w")
        d = f.create_table("/", "title", x)
        d.cols.amount.create_csindex()
        d.cols.id.create_csindex()
        yield d
        d.close()
        f.close()
Example #36
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
Example #37
0
def test_csv_with_trailing_commas():
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            # note the trailing space in the header
            f.write('a,b,c, \n1, 2, 3, ')
        csv = CSV(fn)
        assert repr(Data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', ''
        ]
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            f.write('a,b,c,\n1, 2, 3, ')  # NO trailing space in the header
        csv = CSV(fn)
        assert repr(Data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', 'Unnamed: 3'
        ]
Example #38
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = Data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
Example #39
0
def test_hdf5_from_datashape():
    with tmpfile('.hdf5') as fn:
        f = hdf5_from_datashape(fn, '{x: int32, y: {z: 3 * int32}}')
        assert isinstance(f, h5py.File)
        assert 'x' in f
        assert f['y/z'].shape == (3,)
        assert f['y/z'].dtype == 'i4'

        # ensure idempotence
        f = hdf5_from_datashape(fn, '{x: int32, y: {z: 3 * int32}}')
Example #40
0
def recdata():
    with tmpfile('.h5') as filename:
        f = h5py.File(filename)
        d = f.create_dataset('/x', shape=rec.shape,
                             dtype=rec.dtype,
                             chunks=(4, 6))
        d['x'] = rec['x']
        d['y'] = rec['y']
        yield d
        f.close()
Example #41
0
def test_computation_on_engine():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri, 'foo', dshape='var * {x: int, y: int}')
        into(sql, [(1, 2), (10, 20)])

        r = resource(uri)
        s = symbol('s', discover(r))

        assert compute(s.foo.x.max(), r) == 10
Example #42
0
def test_inconsistent_schemas():
    with tmpfile('.db') as fn:
        t = resource('sqlite:///' + fn + '::badtable',
                     dshape='var * {name: string, amount: string}')
        into(t, [('Alice', '100'), ('Bob', '200')])

        t2 = resource('sqlite:///' + fn + '::badtable',
                      dshape='var * {name: string, amount: int}')

        assert into(list, t2) == [('Alice', 100), ('Bob', 200)]
Example #43
0
def recdata():
    with tmpfile('.h5') as filename:
        f = h5py.File(filename)
        d = f.create_dataset('/x', shape=rec.shape,
                                   dtype=rec.dtype,
                                   chunks=(4, 6))
        d['x'] = rec['x']
        d['y'] = rec['y']
        yield d
        f.close()
Example #44
0
def bad_csv_df():
    with tmpfile(".csv") as filename:
        with open(filename, mode='w') as badfile:
            # Insert a new record
            badfile.write("userid,text,country\n")
            badfile.write("1,Alice,az\n")
            badfile.write("2,Bob,bl\n")
            for i in range(100):
                badfile.write("%d,badguy,zz\n" % i)
            badfile.write("4,Dan,gb,extra,extra\n")
        yield filename
Example #45
0
def bad_csv_df():
    with tmpfile(".csv") as filename:
        with open(filename, mode='w') as badfile:
            # Insert a new record
            badfile.write("userid,text,country\n")
            badfile.write("1,Alice,az\n")
            badfile.write("2,Bob,bl\n")
            for i in range(100):
                badfile.write("%d,badguy,zz\n" % i)
            badfile.write("4,Dan,gb,extra,extra\n")
        yield filename
    def test_csv_hdf5(self):
        import h5py
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='2 * int')
                hdf5 = HDF5(hdf5_fn, '/data', mode='a', schema='2 * int')

                copy(csv, hdf5)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [[1, 1], [2, 2]])
def test_gzopen_csv():
    with tmpfile('.csv.gz') as filename:
        with gzip.open(filename, 'w') as f:
            f.write('1,1\n2,2')

        # Not a valid CSV file
        assert raises(Exception, lambda: list(CSV(filename, schema='2 * int')))

        dd = CSV(filename, schema='2 * int', open=gzip.open)

        assert list(dd) == [[1, 1], [2, 2]]
def test_gzopen_json():
    with tmpfile('.json.gz') as filename:
        with gzip.open(filename, 'w') as f:
            f.write('[[1, 1], [2, 2]]')

        # Not a valid JSON file
        assert raises(Exception,
                      lambda: list(JSON(filename, schema='2 * int')))

        dd = JSON(filename, schema='2 * int', open=gzip.open)

        assert list(dd) == [[1, 1], [2, 2]]
Example #49
0
def test_resource_works_with_empty_file():
    f = None
    with tmpfile('.bcolz') as filename:
        f = filename

    bc = resource(f, dshape=dshape('{a: int32, b: float64}'))
    assert len(bc) == 0
    assert discover(bc).measure == dshape('{a: int32, b: float64}').measure
    try:
        os.remove(f)
    except OSError:
        pass
Example #50
0
def test_groups():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, '/data/fixed')

        hdf = resource('hdfstore://%s' % fn)
        assert discover(hdf) == discover({'data': {'fixed': df}})

        s = symbol('s', discover(hdf))

        assert list(compute(s.data.fixed, hdf).a) == [1, 2, 3, 4]

        hdf.close()
Example #51
0
def test_gzopen_json():
    with tmpfile('.json.gz') as filename:
        f = gzip.open(filename, 'wt')
        f.write('[[1, 1], [2, 2]]')
        f.close()

        # Not a valid JSON file
        assert raises(Exception,
                      lambda: list(JSON(filename, schema='2 * int')))

        dd = JSON(filename, schema='2 * int', open=gzip.open)

        assert tuplify(list(dd)) == ((1, 1), (2, 2))
Example #52
0
def test_register(sql):
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = SQL(uri, 'foo', schema='{x: int, y: int}')
        assert isinstance(resource(uri, 'foo'), SQL)
        assert isinstance(resource(uri + '::foo'), SQL)

    sql = SQL('sqlite:///:memory:', 'foo', schema='{x: int, y: int}')
    assert isinstance(resource('sqlite:///:memory:', 'foo',
                               schema='{x: int, y: int}'),
                      SQL)
    assert isinstance(resource('sqlite:///:memory:::foo',
                               schema='{x: int, y: int}'),
                      SQL)
Example #53
0
def test_merge_compute():
    data = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)]
    ds = datashape.dshape('var * {id: int, name: string, amount: real}')
    s = symbol('s', ds)

    with tmpfile('db') as fn:
        uri = 'sqlite:///' + fn
        into(uri + '::table', data, dshape=ds)

        expr = transform(s, amount10=s.amount * 10)
        result = into(list, compute(expr, {s: data}))

        assert result == [(1, 'Alice', 100, 1000), (2, 'Bob', 200, 2000),
                          (4, 'Dennis', 400, 4000)]
Example #54
0
def test_all_string_infer_header():
    sdata = """x,tl,z
Be careful driving.,hy,en
Be careful.,hy,en
Can you translate this for me?,hy,en
Chicago is very different from Boston.,hy,en
Don't worry.,hy,en"""
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write(sdata)

        tdata = data(fn, has_header=True)
        assert tdata.data.has_header
        assert tdata.fields == ['x', 'tl', 'z']
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, 3), dtype='i8')
                    d[:] = 1

                csv = CSV(csv_fn, mode='r+', schema='3 * int')
                hdf5 = HDF5(hdf5_fn, '/data')

                copy(hdf5, csv)

                self.assertEquals(list(csv), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
Example #56
0
def test_resource():
    f = None
    with tmpfile('.bcolz') as filename:
        f = filename

    bcolz.ctable(rootdir=f,
                 columns=[[1, 2, 3], [1., 2., 3.]],
                 names=['a', 'b'])
    bc2 = resource(f)

    assert isinstance(bc2, bcolz.ctable)
    try:
        os.remove(f)
    except OSError:
        pass
Example #57
0
    def test_csv_hdf5(self):
        from dynd import nd
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('1,1\n2,2\n') as csv_fn:
                csv = CSV(csv_fn, schema='{a: int32, b: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema='{a: int32, b: int32}')

                into(hdf5, csv)

                self.assertEquals(nd.as_py(hdf5.as_dynd()), [{
                    'a': 1,
                    'b': 1
                }, {
                    'a': 2,
                    'b': 2
                }])
Example #58
0
def test_explicit_override_dshape():
    ds = dshape("""var * {a: ?float64,
                        b: ?string,
                        c: ?float32}""")
    # If not overridden, the dshape discovery will return:
    # var * {a: int64, b: string, c: int64}.
    s = textwrap.dedent("""\
                        a,b,c
                        1,x,3
                        2,y,4
                        3,z,5
                        """)
    with tmpfile('.csv') as filename:
        with open(filename, 'w') as fd:
            fd.write(s)
        bdf = data(filename, dshape=ds)
        assert bdf.dshape == ds
Example #59
0
    def test_hdf5_csv(self):
        import h5py
        with tmpfile('hdf5') as hdf5_fn:
            with filetext('') as csv_fn:
                with h5py.File(hdf5_fn, 'w') as f:
                    d = f.create_dataset('data', (3, ),
                                         dtype=np.dtype([(c, 'i4')
                                                         for c in 'abc']))
                    d[:] = np.array(1)

                csv = CSV(csv_fn,
                          mode='r+',
                          schema='{a: int32, b: int32, c: int32}')
                hdf5 = HDF5(hdf5_fn, '/data', schema=csv.schema)

                into(csv, hdf5)

                self.assertEquals(tuple(map(tuple, csv)),
                                  ((1, 1, 1), (1, 1, 1), (1, 1, 1)))
Example #60
0
    def test_csv_sql_json(self):
        data = [('Alice', 100), ('Bob', 200)]
        text = '\n'.join(','.join(map(str, row)) for row in data)
        schema = '{name: string, amount: int}'
        with filetext(text) as csv_fn:
            with filetext('') as json_fn:
                with tmpfile('db') as sqldb:

                    csv = CSV(csv_fn, mode='r', schema=schema)
                    sql = SQL('sqlite:///' + sqldb, 'testtable', schema=schema)
                    json = JSON_Streaming(json_fn, mode='r+', schema=schema)

                    into(sql, csv)

                    self.assertEqual(into(list, sql), data)

                    into(json, sql)

                    with open(json_fn) as f:
                        assert 'Alice' in f.read()