def test_base(): for expr, exclusions in expressions.items(): if iscollection(expr.dshape): model = into( DataFrame, into(np.ndarray, expr._subs({t: Data(base, t.dshape)}))) else: model = compute(expr._subs({t: Data(base, t.dshape)})) print('\nexpr: %s\n' % expr) for source in sources: if id(source) in map(id, exclusions): continue print('%s <- %s' % (typename(model), typename(source))) T = Data(source) if iscollection(expr.dshape): result = into(type(model), expr._subs({t: T})) if isscalar(expr.dshape.measure): assert set(into(list, result)) == set(into(list, model)) else: assert df_eq(result, model) elif isrecord(expr.dshape): result = compute(expr._subs({t: T})) assert into(tuple, result) == into(tuple, model) else: result = compute(expr._subs({t: T})) try: result = result.scalar() except AttributeError: pass assert result == model
def test_pickle_roundtrip(): ds = Data(1) assert ds.isidentical(pickle.loads(pickle.dumps(ds))) assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1))) es = Data(np.array([1, 2, 3])) assert es.isidentical(pickle.loads(pickle.dumps(es))) assert (es + 1).isidentical(pickle.loads(pickle.dumps(es + 1)))
def test_can_trivially_create_sqlite_table(): pytest.importorskip('sqlalchemy') Data('sqlite:///'+example('iris.db')+'::iris') # in context with Data('sqlite:///'+example('iris.db')+'::iris') as d: assert d is not None
def test_coerce_date_and_datetime(): x = datetime.datetime.now().date() d = Data(x) assert repr(d) == repr(x) x = datetime.datetime.now() d = Data(x) assert repr(d) == repr(x)
def test_no_name_for_simple_data(): d = Data([1, 2, 3]) assert repr(d) == ' \n0 1\n1 2\n2 3' assert not d._name d = Data(1) assert not d._name assert repr(d) == '1'
def test_pickle_roundtrip(): ds = Data(1) assert ds.isidentical(pickle.loads(pickle.dumps(ds))) assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1))) es = Data(np.array([1, 2, 3])) rs = pickle.loads(pickle.dumps(es)) assert (es.data == rs.data).all() assert_dshape_equal(es.dshape, rs.dshape)
def test___array__(): x = np.ones(4) d = Data(x) assert (np.array(d + 1) == x + 1).all() d = Data(x[:2]) x[2:] = d + 1 assert x.tolist() == [1, 1, 2, 2]
def test_data_on_iterator_refies_data(): data = [1, 2, 3] d = Data(iter(data)) assert into(list, d) == data assert into(list, d) == data # in context with Data(iter(data)) as d: assert d is not None
def test_head_compute(): data = tm.makeMixedDataFrame() t = symbol('t', discover(data)) db = into('sqlite:///:memory:::t', data, dshape=t.dshape) n = 2 d = Data(db) # skip the header and the ... at the end of the repr expr = d.head(n) s = repr(expr) assert '...' not in s result = s.split('\n')[1:] assert len(result) == n
def test_csv_with_trailing_commas(): with tmpfile('.csv') as fn: with open(fn, 'wt') as f: # note the trailing space in the header f.write('a,b,c, \n1, 2, 3, ') csv = CSV(fn) assert repr(Data(fn)) assert discover(csv).measure.names == ['a', 'b', 'c', ''] with tmpfile('.csv') as fn: with open(fn, 'wt') as f: f.write('a,b,c,\n1, 2, 3, ') # NO trailing space in the header csv = CSV(fn) assert repr(Data(fn)) assert discover(csv).measure.names == ['a', 'b', 'c', 'Unnamed: 3']
def test_dataframe_backed_repr_complex(): df = pd.DataFrame([(1, 'Alice', 100), (2, 'Bob', -200), (3, 'Charlie', 300), (4, 'Denis', 400), (5, 'Edith', -500)], columns=['id', 'name', 'balance']) t = Data(df) repr(t[t['balance'] < 0])
def test_asarray_fails_on_different_column_names(): vs = {'first': [2., 5., 3.], 'second': [4., 1., 4.], 'third': [6., 4., 3.]} df = pd.DataFrame(vs) with pytest.raises(ValueError): Data(df, fields=list('abc'))
def test_coerce_date_and_datetime(): x = datetime.datetime.now().date() d = Data(x) assert repr(d) == repr(x) x = pd.Timestamp.now() d = Data(x) assert repr(d) == repr(x) x = np.nan d = Data(x, dshape='datetime') assert repr(d) == repr(pd.NaT) x = float('nan') d = Data(x, dshape='datetime') assert repr(d) == repr(pd.NaT)
def test_str_does_not_repr(): # see GH issue #1240. d = Data([('aa', 1), ('b', 2)], name="ZZZ", dshape='2 * {a: string, b: int64}') expr = transform(d, c=d.a.strlen() + d.b) assert str( expr) == "Merge(_child=ZZZ, children=(ZZZ, label(strlen(_child=ZZZ.a) + ZZZ.b, 'c')))"
def test_partially_bound_expr(): df = pd.DataFrame([(1, 'Alice', 100), (2, 'Bob', -200), (3, 'Charlie', 300), (4, 'Denis', 400), (5, 'Edith', -500)], columns=['id', 'name', 'balance']) data = Data(df, name='data') a = symbol('a', 'int') expr = data.name[data.balance > a] assert repr(expr) == 'data[data.balance > a].name'
def test_table_resource(): with tmpfile('csv') as filename: ds = dshape('var * {a: int, b: int}') csv = CSV(filename) append(csv, [[1, 2], [10, 20]], dshape=ds) t = Data(filename) assert isinstance(t.data, CSV) assert into(list, compute(t)) == into(list, csv)
def test_incompatible_types(): d = Data(pd.DataFrame(L, columns=['id', 'name', 'amount'])) with pytest.raises(ValueError): d.id == 'foo' result = compute(d.id == 3) expected = pd.Series([False, False, True, False, False], name='id') tm.assert_series_equal(result, expected)
def test_all_string_infer_header(): data = """x,tl,z Be careful driving.,hy,en Be careful.,hy,en Can you translate this for me?,hy,en Chicago is very different from Boston.,hy,en Don't worry.,hy,en""" with tmpfile('.csv') as fn: with open(fn, 'w') as f: f.write(data) data = Data(fn, has_header=True) assert data.data.has_header assert data.fields == ['x', 'tl', 'z']
def test_repr(): result = expr_repr(t['name']) print(result) assert isinstance(result, str) assert 'Alice' in result assert 'Bob' in result assert '...' not in result result = expr_repr(t['amount'] + 1) print(result) assert '101' in result t2 = Data(tuple((i, i**2) for i in range(100)), fields=['x', 'y']) assert t2.dshape == dshape('100 * {x: int64, y: int64}') result = expr_repr(t2) print(result) assert len(result.split('\n')) < 20 assert '...' in result
def test_create_with_raw_data(): t = Data(data, columns=['name', 'amount']) assert t.schema == dshape('{name: string, amount: int64}') assert t.name assert t.data == data
def test_create_with_schema(): t = Data(data, schema='{name: string, amount: float32}') assert t.schema == dshape('{name: string, amount: float32}')
def test_compute_on_Data_gives_back_data(): assert compute(Data([1, 2, 3])) == [1, 2, 3]
def test_scalar_sql_compute(): t = into('sqlite:///:memory:::t', data, dshape=dshape('var * {name: string, amount: int}')) d = Data(t) assert repr(d.amount.sum()) == '300'
def test_highly_nested_repr(): data = [[0, [[1, 2], [3]], "abc"]] d = Data(data) assert "abc" in repr(d.head())
def test_table_raises_on_inconsistent_inputs(): with pytest.raises(ValueError): t = Data(data, schema='{name: string, amount: float32}', dshape=dshape("{name: string, amount: float32}"))
def test_highly_nested_repr(): data = [[0, [[1, 2], [3]], 'abc']] d = Data(data) assert 'abc' in repr(d.head())
def test_iter(): x = np.ones(4) d = Data(x) assert list(d + 1) == [2, 2, 2, 2]
def test_python_scalar_protocols(): d = Data(1) assert int(d + 1) == 2 assert float(d + 1.0) == 2.0 assert bool(d > 0) is True assert complex(d + 1.0j) == 1 + 1.0j
def test_generator_reprs_concretely(): x = [1, 2, 3, 4, 5, 6] d = Data(x) expr = d[d > 2] + 1 assert '4' in repr(expr)
def test_repr_on_nd_array_doesnt_err(): d = Data(np.ones((2, 2, 2))) repr(d + 1)
def test_mutable_backed_repr(): mutable_backed_table = Data([[0]], fields=['col1']) repr(mutable_backed_table)
def test_DataFrame(): x = np.array([(1, 2), (1., 2.)], dtype=[('a', 'i4'), ('b', 'f4')]) d = Data(x) assert isinstance(pd.DataFrame(d), pd.DataFrame)