def test_append_convert(empty_bank, raw_bank): ds = discover(raw_bank) assert set(ds.measure.names) == {'name', 'amount'} append(empty_bank, raw_bank, dshape=ds) assert odo(empty_bank, list, dshape=ds) == list(pluck(ds.measure.names, raw_bank))
def test_drop(): with tmpfile('json') as fn: js = JSON(fn) append(js, [1, 2, 3]) assert os.path.exists(fn) drop(js) assert not os.path.exists(fn)
def test_append_convert(empty_bank, raw_bank): ds = discover(raw_bank) assert set(ds.measure.names) == {'name', 'amount'} append(empty_bank, raw_bank, dshape=ds) assert odo(empty_bank, list, dshape=ds) == list( pluck(ds.measure.names, raw_bank) )
def test_drop(): with tmpfile('.bson') as fn: bs = BSON(fn) append(bs, dat) assert os.path.exists(fn) drop(bs) assert not os.path.exists(fn)
def test_append_json(): with tmpfile('json') as fn: j = JSON(fn) append(j, dat) with open(j.path) as f: lines = f.readlines() assert len(lines) == 1 assert 'Alice' in lines[0] assert 'Bob' in lines[0]
def test_write_gzip_lines(): with tmpfile('json.gz') as fn: j = JSONLines(fn) append(j, dat) f = gzip.open(fn) line = next(f) f.close() assert line.decode('utf-8').strip() == str(json.dumps(dat[0]))
def test_table_resource(): with tmpfile('csv') as filename: ds = dshape('var * {a: int, b: int}') csv = CSV(filename) append(csv, [[1, 2], [10, 20]], dshape=ds) t = Data(filename) assert isinstance(t.data, CSV) assert into(list, compute(t)) == into(list, csv)
def test_csv_to_s3_append(): df = tm.makeMixedDataFrame() with tmpfile('.csv') as fn: with s3_bucket('.csv') as b: s3 = resource(b) df.to_csv(fn, index=False) append(s3, CSV(fn)) result = into(pd.DataFrame, s3) tm.assert_frame_equal(df, result)
def test_table_resource(): with tmpfile('csv') as filename: ds = dshape('var * {a: int, b: int}') csv = CSV(filename) append(csv, [[1, 2], [10, 20]], dshape=ds) t = data(filename) assert isinstance(t.data, CSV) assert into(list, compute(t)) == into(list, csv)
def test_write_gzip(): with tmpfile('json.gz') as fn: j = JSON(fn) append(j, dat) f = gzip.open(fn) text = f.read() f.close() assert text.decode('utf-8').strip() == str(json.dumps(dat)) assert isinstance(resource(fn), (JSON, JSONLines))
def test_append_and_convert_round_trip(): engine = sa.create_engine("sqlite:///:memory:") metadata = sa.MetaData(engine) t = sa.Table("bank", metadata, sa.Column("name", sa.String, primary_key=True), sa.Column("balance", sa.Integer)) t.create() data = [("Alice", 1), ("Bob", 2)] append(t, data) assert convert(list, t) == data
def test_append_sas_to_sqlite_round_trip(): expected = convert(set, sasfile) with tmpfile('db') as fn: r = resource('sqlite:///%s::SAS' % fn, dshape=discover(sasfile)) append(r, sasfile) result = convert(set, r) assert expected == result
def test_datetimes(): from odo import into import numpy as np data = [{'a': 1, 'dt': datetime.datetime(2001, 1, 1)}, {'a': 2, 'dt': datetime.datetime(2002, 2, 2)}] with tmpfile('json') as fn: j = JSONLines(fn) append(j, data) assert str(into(np.ndarray, j)) == str(into(np.ndarray, data))
def test_append_and_convert_round_trip(): engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) t = sa.Table('bank', metadata, sa.Column('name', sa.String, primary_key=True), sa.Column('balance', sa.Integer)) t.create() data = [('Alice', 1), ('Bob', 2)] append(t, data) assert convert(list, t) == data
def test_datetimes(): from odo import into import numpy as np data = [{ 'a': 1, 'dt': datetime.datetime(2001, 1, 1) }, { 'a': 2, 'dt': datetime.datetime(2002, 2, 2) }] with tmpfile('json') as fn: j = JSONLines(fn) append(j, data) assert str(into(np.ndarray, j)) == str(into(np.ndarray, data))
def test_tuples_to_json(): ds = dshape('var * {a: int, b: int}') with tmpfile('json') as fn: j = JSON(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read() with tmpfile('json') as fn: j = JSONLines(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read()
def test_select_to_iterator(): engine, t = single_table_engine() append(t, [('Alice', 100), ('Bob', 200)]) sel = sa.select([t.c.amount + 1]) assert convert(list, sel) == [(101, ), (201, )] assert convert(list, sel, dshape=dshape('var * int')) == [101, 201] sel2 = sa.select([sa.sql.func.sum(t.c.amount)]) assert convert(int, sel2, dshape=dshape('int')) == 300 sel3 = sa.select([t]) result = convert(list, sel3, dshape=discover(t)) assert type(result[0]) is tuple
def test_select_to_iterator(): engine, t = single_table_engine() append(t, [('Alice', 100), ('Bob', 200)]) sel = sa.select([t.c.amount + 1]) assert convert(list, sel) == [(101,), (201,)] assert convert(list, sel, dshape=dshape('var * int')) == [101, 201] sel2 = sa.select([sa.sql.func.sum(t.c.amount)]) assert convert(int, sel2, dshape=dshape('int')) == 300 sel3 = sa.select([t]) result = convert(list, sel3, dshape=discover(t)) assert type(result[0]) is tuple
def test_into_table_iterator(): engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) t = dshape_to_table('points', '{x: int, y: int}', metadata=metadata) t.create() data = [(1, 1), (2, 4), (3, 9)] append(t, data) assert convert(list, t) == data assert isinstance(convert(list, t)[0], tuple) t2 = dshape_to_table('points2', '{x: int, y: int}', metadata=metadata) t2.create() data2 = [{'x': 1, 'y': 1}, {'x': 2, 'y': 4}, {'x': 3, 'y': 9}] append(t2, data2) assert convert(list, t2) == data
def test_into_table_iterator(): engine = sa.create_engine("sqlite:///:memory:") metadata = sa.MetaData(engine) t = dshape_to_table("points", "{x: int, y: int}", metadata=metadata) t.create() data = [(1, 1), (2, 4), (3, 9)] append(t, data) assert convert(list, t) == data assert isinstance(convert(list, t)[0], tuple) t2 = dshape_to_table("points2", "{x: int, y: int}", metadata=metadata) t2.create() data2 = [{"x": 1, "y": 1}, {"x": 2, "y": 4}, {"x": 3, "y": 9}] append(t2, data2) assert convert(list, t2) == data
def test_append_from_select(sqlite_file): # we can't test in memory here because that creates two independent # databases raw = np.array([(200.0, 'Glenn'), (314.14, 'Hope'), (235.43, 'Bob')], dtype=[('amount', 'float64'), ('name', 'U5')]) raw2 = np.array([(800.0, 'Joe'), (914.14, 'Alice'), (1235.43, 'Ratso')], dtype=[('amount', 'float64'), ('name', 'U5')]) t = into('%s::t' % sqlite_file, raw) s = into('%s::s' % sqlite_file, raw2) t = append(t, s.select()) result = into(list, t) expected = np.concatenate((raw, raw2)).tolist() assert result == expected
def test_append_from_select(sqlite_file): # we can't test in memory here because that creates two independent # databases raw = np.array([(200.0, "Glenn"), (314.14, "Hope"), (235.43, "Bob")], dtype=[("amount", "float64"), ("name", "S5")]) raw2 = np.array( [(800.0, "Joe"), (914.14, "Alice"), (1235.43, "Ratso")], dtype=[("amount", "float64"), ("name", "S5")] ) t = into("%s::t" % sqlite_file, raw) s = into("%s::s" % sqlite_file, raw2) t = append(t, s.select()) result = into(list, t) expected = np.concatenate((raw, raw2)).tolist() assert result == expected
def test_append_from_table(): # we can't test in memory here because that creates two independent # databases with tmpfile('db') as fn: raw = np.array([(200.0, 'Glenn'), (314.14, 'Hope'), (235.43, 'Bob')], dtype=[('amount', 'float64'), ('name', 'U5')]) raw2 = np.array([(800.0, 'Joe'), (914.14, 'Alice'), (1235.43, 'Ratso')], dtype=[('amount', 'float64'), ('name', 'U5')]) t = into('sqlite:///%s::t' % fn, raw) s = into('sqlite:///%s::s' % fn, raw2) t = append(t, s) result = odo(t, list) expected = np.concatenate((raw, raw2)).tolist() assert result == expected
def test_append_from_table(): # we can't test in memory here because that creates two independent # databases with tmpfile("db") as fn: raw = np.array( [(200.0, "Glenn"), (314.14, "Hope"), (235.43, "Bob")], dtype=[("amount", "float64"), ("name", "S5")] ) raw2 = np.array( [(800.0, "Joe"), (914.14, "Alice"), (1235.43, "Ratso")], dtype=[("amount", "float64"), ("name", "S5")] ) t = into("sqlite:///%s::t" % fn, raw) s = into("sqlite:///%s::s" % fn, raw2) t = append(t, s) result = odo(t, list) expected = np.concatenate((raw, raw2)).tolist() assert result == expected
def test_sql_field_names_disagree_on_names(): r = resource('sqlite:///:memory:::tb', dshape=dshape('{x: int, y: int}')) assert raises(Exception, lambda: append(r, [(1, 2), (10, 20)], dshape=dshape('{x: int, z: int}')))
def list_to_temporary_bson(data, **kwargs): fn = '.%s.bson' % uuid.uuid1() target = Temp(BSON)(fn) return append(target, data, **kwargs)
def object_to_bson(b, o, **kwargs): return append(b, convert(Iterator, o, **kwargs), **kwargs)
def test_sql_field_names_disagree_on_order(): r = resource('sqlite:///:memory:::tb', dshape=dshape('{x: int, y: int}')) append(r, [(1, 2), (10, 20)], dshape=dshape('{y: int, x: int}')) assert convert(set, r) == set([(2, 1), (20, 10)])
def test_write_gzip(): with tmpfile('.bson.gz') as fn: b = BSON(fn) append(b, dat) assert convert(list, b) == dat
def test_extend_empty(): engine, t = single_table_engine() assert not convert(list, t) append(t, []) assert not convert(list, t)
def append(a, b, **kwargs): return append(a.data, b, **kwargs)
def test_append_convert(): with coll([]) as c: append(c, bank, dshape=ds) assert odo(c, list, dshape=ds) == list(pluck(['name', 'amount'], bank))
def test_append_chunks(): with file(df) as (fn, f, dset): append(dset, chunks(pd.DataFrame)([df, df])) assert discover(dset).shape[0] == len(df) * 3
def test_append(): with file(df) as (fn, f, dset): append(dset, df) append(dset, df) assert discover(dset).shape == (len(df) * 3, )
def test_append_convert(): with coll([]) as c: append(c, bank, dshape=ds) assert convert(list, c, dshape=ds) == list(pluck(["name", "amount"], bank))
def test_append_bson(): with tmpfile('.bson') as fn: b = BSON(fn) append(b, dat) assert convert(list, b) == dat
def iterator_to_temporary_jsonlines(data, **kwargs): fn = '.%s.json' % uuid.uuid1() target = Temp(JSONLines)(fn) return append(target, data, **kwargs)
def test_append(): with file(df) as (fn, f, dset): append(dset, df) append(dset, df) assert discover(dset).shape == (len(df) * 3,)