def test_append_sas_to_sqlite_round_trip(): expected = convert(set, sasfile) with tmpfile('db') as fn: r = resource('sqlite:///%s::SAS' % fn, dshape=discover(sasfile)) append(r, sasfile) result = convert(set, r) assert expected == result
def test_slicing_with_lists(): nx = np.arange(20).reshape((4, 5)) dx = convert(Array, nx, blockshape=(2, 2)) sx = symbol('x', discover(dx)) expr = sx[[2, 0, 3]] assert eq(np.array(compute(expr, dx)), compute(expr, nx)) expr = sx[::2, [2, 0, 3]] assert eq(np.array(compute(expr, dx)), compute(expr, nx)) expr = sx[1, [2, 0, 3]] assert eq(np.array(compute(expr, dx)), compute(expr, nx)) expr = sx[[2, 0, 3], -2] assert eq(np.array(compute(expr, dx)), compute(expr, nx)) expr = sx[:, :] assert compute(expr, dx).dask == dx.dask expr = sx[0] assert eq(np.array(compute(expr, dx)), compute(expr, nx)) expr = sx[0, [3, 1, 4]] assert eq(np.array(compute(expr, dx)), compute(expr, nx))
def test_select_to_iterator(): engine, t = single_table_engine() append(t, [('Alice', 100), ('Bob', 200)]) sel = sa.select([t.c.amount + 1]) assert convert(list, sel) == [(101,), (201,)] assert convert(list, sel, dshape=dshape('var * int')) == [101, 201] sel2 = sa.select([sa.sql.func.sum(t.c.amount)]) assert convert(int, sel2, dshape=dshape('int')) == 300 sel3 = sa.select([t]) result = convert(list, sel3, dshape=discover(t)) assert type(result[0]) is tuple
def test_into_table_iterator(): engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) t = dshape_to_table('points', '{x: int, y: int}', metadata=metadata) t.create() data = [(1, 1), (2, 4), (3, 9)] append(t, data) assert convert(list, t) == data t2 = dshape_to_table('points2', '{x: int, y: int}', metadata=metadata) t2.create() data2 = [{'x': 1, 'y': 1}, {'x': 2, 'y': 4}, {'x': 3, 'y': 9}] append(t2, data2) assert convert(list, t2) == data
def test_convert_sas_to_dataframe(): df = convert(pd.DataFrame, sasfile) assert isinstance(df, pd.DataFrame) # pandas doesn't support date expected = str(ds.measure).replace('date', 'datetime') assert str(discover(df).measure) == expected
def test_convert_sas_to_dataframe(): df = convert(pd.DataFrame, sasfile) assert isinstance(df, pd.DataFrame) # pandas doesn't support date expected = str(ds.measure).replace('date', 'datetime') assert str(discover(df).measure).replace('?', '') == expected
def test_slicing_on_boundary_lines(): nx = np.arange(100).reshape((10, 10)) dx = convert(Array, nx, blockshape=(3, 3)) sx = symbol('x', discover(dx)) expr = sx[0, [1, 3, 9, 3]] result = compute(expr, dx) assert eq(result, nx[0, [1, 3, 9, 3]])
def test_insert_to_ooc(): x = np.arange(600).reshape((20, 30)) y = np.empty(shape=x.shape, dtype=x.dtype) a = convert(Array, x, blockshape=(4, 5)) dsk = insert_to_ooc(y, a) core.get(merge(dsk, a.dask), list(dsk.keys())) assert eq(y, x)
def test_append_and_convert_round_trip(): engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) t = sa.Table('bank', metadata, sa.Column('name', sa.String, primary_key=True), sa.Column('balance', sa.Integer)) t.create() data = [('Alice', 1), ('Bob', 2)] append(t, data) assert convert(list, t) == data
def test_multiple_object_ids(): data = [{ 'x': 1, 'y': 2, 'other': ObjectId('1' * 24) }, { 'x': 3, 'y': 4, 'other': ObjectId('2' * 24) }] with coll(data) as c: assert discover(c) == dshape('2 * {x: int64, y: int64}') assert convert(list, c) == [(1, 2), (3, 4)]
def test_slicing_with_newaxis(): nx = np.arange(20).reshape((4, 5)) dx = convert(Array, nx, blockshape=(2, 2)) sx = symbol('x', discover(dx)) expr = sx[:, None, :] result = compute(expr, dx) assert result.shape == (4, 1, 5) assert result.blockdims == ((2, 2), (1, ), (2, 2, 1)) assert eq(np.array(result), compute(expr, nx)) expr = sx[None, [2, 1, 3], None, None, :, None] result = compute(expr, dx) assert result.shape == (1, 3, 1, 1, 5, 1) assert result.blockdims == ((1, ), (3, ), (1, ), (1, ), (2, 2, 1), (1, )) assert eq(np.array(result), compute(expr, nx))
def test_fixed_convert(): with tmpfile('.hdf5') as fn: df.to_hdf(fn, 'foo') r = resource('hdfstore://'+fn+'::/foo') assert eq(convert(pd.DataFrame, r), df)
def pre_compute(expr, data, **kwargs): leaf = expr._leaves()[0] if all(isinstance(e, Cheap) for e in path(expr, leaf)): return convert(Iterator, data) else: raise MDNotImplementedError()
def test_sql_field_names_disagree_on_order(): r = resource('sqlite:///:memory:::tb', dshape=dshape('{x: int, y: int}')) append(r, [(1, 2), (10, 20)], dshape=dshape('{y: int, x: int}')) assert convert(set, r) == set([(2, 1), (20, 10)])
def test_convert_pandas(): with file(df) as (fn, f, dset): assert eq(convert(pd.DataFrame, dset), df)
def test_extend_empty(): engine, t = single_table_engine() assert not convert(list, t) append(t, []) assert not convert(list, t)
def test_convert_sas_to_list(): out = convert(list, sasfile) assert isinstance(out, list) assert not any(isinstance(item, str) for item in out[0]) # No header assert all(isinstance(ln, list) for ln in out)
def test_convert_chunks(): with file(df) as (fn, f, dset): c = convert(chunks(pd.DataFrame), dset, chunksize=len(df) / 2) assert len(list(c)) == 2 assert eq(convert(pd.DataFrame, c), df)
def test_chunks(): with file(df) as (fn, f, dset): c = convert(chunks(pd.DataFrame), dset) assert eq(convert(np.ndarray, c), df)
def test_convert(): x = np.arange(600).reshape((20, 30)) d = convert(Array, x, blockshape=(4, 5)) assert isinstance(d, Array)
def test_fixed_convert(): with tmpfile('.hdf5') as fn: df.to_hdf(fn, 'foo') r = resource('hdfstore://' + fn + '::/foo') assert eq(convert(pd.DataFrame, r), df) r.parent.close()
from operator import getitem def eq(a, b): if isinstance(a, Array): a = a.compute() if isinstance(b, Array): b = b.compute() c = a == b if isinstance(c, np.ndarray): c = c.all() return c nx = np.arange(600).reshape((20, 30)) dx = convert(Array, nx, blockshape=(4, 5)) sx = symbol('x', discover(dx)) ny = np.arange(600).reshape((30, 20)) dy = convert(Array, ny, blockshape=(5, 4)) sy = symbol('y', discover(dy)) na = np.arange(20) da = convert(Array, na, blockshape=(4, )) sa = symbol('a', discover(da)) nb = np.arange(30).reshape((30, 1)) db = convert(Array, nb, blockshape=(5, 1)) sb = symbol('b', discover(db)) dask_ns = {sx: dx, sy: dy, sa: da, sb: db}
def test__array__(): x = np.arange(600).reshape((20, 30)) d = convert(Array, x, blockshape=(4, 5)) assert eq(x, np.array(d))
def into(sc, o, **kwargs): return sc.parallelize(convert(list, o, **kwargs))
def test_convert_to_numpy_array(): x = np.arange(600).reshape((20, 30)) d = convert(Array, x, blockshape=(4, 5)) x2 = convert(np.ndarray, d) assert eq(x, x2)
def test_append_convert(): with coll([]) as c: append(c, bank, dshape=ds) assert convert(list, c, dshape=ds) == list(pluck(['name', 'amount'], bank))