Esempio n. 1
0
def test_subterms():
    a = Symbol('a', 'var * {x: int, y: int, z: int}')
    assert list(a._subterms()) == [a]
    assert set(a['x']._subterms()) == set([a, a['x']])
    assert set(a['x'].map(inc, 'int')._subterms()) == \
            set([a, a['x'], a['x'].map(inc, 'int')])
    assert a in set((a['x'] + 1)._subterms())
Esempio n. 2
0
def test_relabel_join():
    names = Symbol('names', 'var * {first: string, last: string}')

    siblings = join(names.relabel({'last': 'left'}),
                    names.relabel({'last': 'right'}), 'first')

    assert siblings.fields == ['first', 'left', 'right']
Esempio n. 3
0
def test_outer_join():
    left = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)]
    right = [('NYC', 1), ('Boston', 1), ('LA', 3), ('Moscow', 4)]

    L = Symbol('L', 'var * {id: int, name: string, amount: real}')
    R = Symbol('R', 'var * {city: string, id: int}')

    assert set(compute(join(L, R), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='left'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (2, 'Bob', 200, None), (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='right'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')])

    assert set(compute(join(L, R, how='outer'), {
        L: left,
        R: right
    })) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'),
                (2, 'Bob', 200, None), (3, None, None, 'LA'),
                (4, 'Dennis', 400, 'Moscow')])
Esempio n. 4
0
def test_by_columns():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')

    assert len(by(t['id'], total=t['amount'].sum()).fields) == 2
    assert len(by(t['id'], count=t['id'].count()).fields) == 2
    print(by(t, count=t.count()).fields)
    assert len(by(t, count=t.count()).fields) == 4
Esempio n. 5
0
def test_multi_column_join():
    left = [(1, 2, 3),
            (2, 3, 4),
            (1, 3, 5)]
    left = DataFrame(left, columns=['x', 'y', 'z'])
    right = [(1, 2, 30),
             (1, 3, 50),
             (1, 3, 150)]
    right = DataFrame(right, columns=['x', 'y', 'w'])

    L = Symbol('L', 'var * {x: int, y: int, z: int}')
    R = Symbol('R', 'var * {x: int, y: int, w: int}')

    j = join(L, R, ['x', 'y'])

    expected = [(1, 2, 3, 30),
                (1, 3, 5, 50),
                (1, 3, 5, 150)]
    expected = DataFrame(expected, columns=['x', 'y', 'z', 'w'])

    result = compute(j, {L: left, R: right})

    print(result)

    assert str(result) == str(expected)
    assert list(result.columns) == list(j.fields)
Esempio n. 6
0
def test_join_by_arcs():
    df_idx = DataFrame([['A', 1],
                        ['B', 2],
                        ['C', 3]],
                      columns=['name', 'node_id'])

    df_arc = DataFrame([[1, 3],
                        [2, 3],
                        [3, 1]],
                       columns=['node_out', 'node_id'])

    t_idx = Symbol('t_idx', 'var * {name: string, node_id: int32}')

    t_arc = Symbol('t_arc', 'var * {node_out: int32, node_id: int32}')

    joined = join(t_arc, t_idx, "node_id")

    want = by(joined['name'], joined['node_id'].count())

    result = compute(want, {t_arc: df_arc, t_idx:df_idx})

    result_pandas = pd.merge(df_arc, df_idx, on='node_id')

    expected = result_pandas.groupby('name')['node_id'].count().reset_index()
    assert str(result.values) == str(expected.values)
    assert list(result.columns) == ['name', 'node_id_count']
Esempio n. 7
0
def test_distinct_name():
    t = Symbol('t', 'var * {id: int32, name: string}')

    assert t.name.isidentical(t['name'])
    assert t.distinct().name.isidentical(t.distinct()['name'])
    assert t.id.distinct()._name == 'id'
    assert t.name._name == 'name'
Esempio n. 8
0
def test_traverse():
    t = Symbol('t', 'var * {name: string, amount: int}')
    assert t in list(t._traverse())

    expr = t.amount.sum()
    trav = list(expr._traverse())
    assert builtins.any(t.amount.isidentical(x) for x in trav)
Esempio n. 9
0
def test_keepdims():
    x = Symbol('x', '5 * 3 * float32')
    assert x.sum(axis=0, keepdims=True).dshape == dshape('1 * 3 * float32')
    assert x.sum(axis=1, keepdims=True).dshape == dshape('5 * 1 * float32')
    assert x.sum(axis=(0, 1), keepdims=True).dshape == dshape('1 * 1 * float32')

    assert x.std(axis=0, keepdims=True).shape == (1, 3)
Esempio n. 10
0
def test_symbol_name():
    t = Symbol('t', '10 * {people: string, amount: int}')
    r = Symbol('r', 'var * int64')
    with pytest.raises(AttributeError):
        t.name
    with pytest.raises(AttributeError):
        r.name
Esempio n. 11
0
def test_Distinct():
    x = np.array([('Alice', 100), ('Alice', -200), ('Bob', 100), ('Bob', 100)],
                 dtype=[('name', 'S5'), ('amount', 'i8')])

    t = Symbol('t', 'var * {name: string, amount: int64}')

    assert eq(compute(t['name'].distinct(), x), np.unique(x['name']))
    assert eq(compute(t.distinct(), x), np.unique(x))
Esempio n. 12
0
def test_nelements_array():
    t = Symbol('t', '5 * 4 * 3 * float64')
    x = np.random.randn(*t.shape)
    result = compute(t.nelements(axis=(0, 1)), x)
    np.testing.assert_array_equal(result, np.array([20, 20, 20]))

    result = compute(t.nelements(axis=1), x)
    np.testing.assert_array_equal(result, 4 * np.ones((5, 3)))
Esempio n. 13
0
def test_symbol_projection_failures():
    t = Symbol('t', '10 * {name: string, amount: int}')
    with pytest.raises(ValueError):
        t._project(['name', 'id'])
    with pytest.raises(AttributeError):
        t.foo
    with pytest.raises(TypeError):
        t._project(t.dshape)
Esempio n. 14
0
def test_Distinct():
    t = Symbol('t', 'var * {name: string, amount: int32}')
    r = distinct(t['name'])
    print(r.dshape)
    assert r.dshape  == dshape('var * string')
    assert r._name == 'name'

    r = t.distinct()
    assert r.dshape  == t.dshape
Esempio n. 15
0
def test_sort():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')
    s = t.sort('amount', ascending=True)
    print(str(s))
    assert eval(str(s)).isidentical(s)

    assert s.schema == t.schema

    assert t['amount'].sort().key == 'amount'
Esempio n. 16
0
def test_numbers():
    x = Symbol('x', 'real')
    y = Symbol('x', 'int')
    for expr in [x + 1, x - 1, x * 1, x + y, x - y, x / y, x * y + x + y,
                 x**y, x**2, 2**x, x % 5, -x,
                 sin(x), cos(x ** 2), exp(log(y))]:
        assert expr.dshape == dshape('real')
        assert eval(str(expr)) == expr

    assert (-y).dshape == dshape('int')
Esempio n. 17
0
def test_like():
    t = Symbol('t', 'var * {name: string, city: string}')
    data = [('Alice Smith', 'New York'),
            ('Bob Smith', 'Chicago'),
            ('Alice Walker', 'LA')]

    assert list(compute(t.like(name='Alice*'), data)) == [data[0], data[2]]
    assert list(compute(t.like(name='lice*'), data)) == []
    assert list(compute(t.like(name='*Smith*'), data)) == [data[0], data[1]]
    assert list(compute(t.like(name='*Smith*', city='New York'), data)) == [data[0]]
Esempio n. 18
0
def test_multi_column_join():
    a = Symbol('a', 'var * {x: int, y: int, z: int}')
    b = Symbol('b', 'var * {w: int, x: int, y: int}')
    j = join(a, b, ['x', 'y'])

    assert set(j.fields) == set('wxyz')

    assert j.on_left == j.on_right == ['x', 'y']
    assert hash(j)

    assert j.fields == ['x', 'y', 'z', 'w']
Esempio n. 19
0
def test_length():
    t = Symbol('t', '10 * {name: string, amount: int}')
    s = Symbol('s', 'var * {name:string, amount:int}')
    assert t.dshape == dshape('10 * {name: string, amount: int}')
    assert len(t) == 10
    assert len(t.name) == 10
    assert len(t[['name']]) == 10
    assert len(t.sort('name')) == 10
    assert len(t.head(5)) == 5
    assert len(t.head(50)) == 10
    with pytest.raises(ValueError):
        len(s)
Esempio n. 20
0
def test_serializable():
    t = Symbol('t', 'var * {id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t))

    assert t.isidentical(t2)

    s = Symbol('t', 'var * {id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr))

    assert expr.isidentical(expr2)
Esempio n. 21
0
def test_relabel_join():
    names = Symbol('names', 'var * {first: string, last: string}')

    siblings = join(names.relabel({'first': 'left'}),
                    names.relabel({'first': 'right'}),
                    'last')[['left', 'right']]

    data = [('Alice', 'Smith'), ('Bob', 'Jones'), ('Charlie', 'Smith')]

    print(set(compute(siblings, {names: data})))
    assert ('Alice', 'Charlie') in set(compute(siblings, {names: data}))
    assert ('Alice', 'Bob') not in set(compute(siblings, {names: data}))
Esempio n. 22
0
def test_leaves():
    t = Symbol('t', 'var * {id: int32, name: string}')
    v = Symbol('v', 'var * {id: int32, city: string}')
    x = symbol('x', 'int32')

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, count=t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Esempio n. 23
0
def test_Distinct():
    x = np.array([('Alice', 100),
                  ('Alice', -200),
                  ('Bob', 100),
                  ('Bob', 100)],
                dtype=[('name', 'S5'), ('amount', 'i8')])

    t = Symbol('t', 'var * {name: string, amount: int64}')

    assert eq(compute(t['name'].distinct(), x),
              np.unique(x['name']))
    assert eq(compute(t.distinct(), x),
              np.unique(x))
Esempio n. 24
0
def test_relabel():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})
    rlc = t['amount'].relabel({'amount': 'BALANCE'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ['NAME', 'amount', 'ID']

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Esempio n. 25
0
def test_multi_column_join():
    left = [(1, 2, 3), (2, 3, 4), (1, 3, 5)]
    right = [(1, 2, 30), (1, 3, 50), (1, 3, 150)]

    L = Symbol('L', 'var * {x: int, y: int, z: int}')
    R = Symbol('R', 'var * {x: int, y: int, w: int}')

    j = join(L, R, ['x', 'y'])

    print(list(compute(j, {L: left, R: right})))
    assert list(compute(j, {
        L: left,
        R: right
    })) == [(1, 2, 3, 30), (1, 3, 5, 50), (1, 3, 5, 150)]
Esempio n. 26
0
def test_outer_join():
    left = [(1, 'Alice', 100),
            (2, 'Bob', 200),
            (4, 'Dennis', 400)]
    left = DataFrame(left, columns=['id', 'name', 'amount'])

    right = [('NYC', 1),
             ('Boston', 1),
             ('LA', 3),
             ('Moscow', 4)]
    right = DataFrame(right, columns=['city', 'id'])

    L = Symbol('L', 'var * {id: int, name: string, amount: real}')
    R = Symbol('R', 'var * {city: string, id: int}')

    convert = lambda df: set(df.to_records(index=False).tolist())

    assert convert(compute(join(L, R), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (4, 'Dennis', 400, 'Moscow')])

    assert convert(compute(join(L, R, how='left'), {L: left, R: right})) == set(
            [(1, 'Alice', 100, 'NYC'),
             (1, 'Alice', 100, 'Boston'),
             (2, 'Bob', 200, np.nan),
             (4, 'Dennis', 400, 'Moscow')])

    df = compute(join(L, R, how='right'), {L: left, R: right})
    expected = DataFrame(
            [(1., 'Alice', 100., 'NYC'),
             (1., 'Alice', 100., 'Boston'),
             (3., np.nan, np.nan, 'LA'),
             (4., 'Dennis', 400., 'Moscow')],
            columns=['id', 'name', 'amount', 'city'])

    assert str(df.sort('id').to_records(index=False)) ==\
            str(expected.sort('id').to_records(index=False))

    df = compute(join(L, R, how='outer'), {L: left, R: right})
    expected = DataFrame(
            [(1., 'Alice', 100., 'NYC'),
             (1., 'Alice', 100., 'Boston'),
             (2., 'Bob', 200., np.nan),
             (3., np.nan, np.nan, 'LA'),
             (4., 'Dennis', 400., 'Moscow')],
            columns=['id', 'name', 'amount', 'city'])

    assert str(df.sort('id').to_records(index=False)) ==\
            str(expected.sort('id').to_records(index=False))
Esempio n. 27
0
def test_relabel_join():
    names = Symbol('names', 'var * {first: string, last: string}')

    siblings = join(names.relabel({'first': 'left'}),
                    names.relabel({'first': 'right'}),
                    'last')[['left', 'right']]

    data = [('Alice', 'Smith'),
            ('Bob', 'Jones'),
            ('Charlie', 'Smith')]

    print(set(compute(siblings, {names: data})))
    assert ('Alice', 'Charlie') in set(compute(siblings, {names: data}))
    assert ('Alice', 'Bob') not in set(compute(siblings, {names: data}))
Esempio n. 28
0
def test_union():
    L1 = [['Alice', 100, 1], ['Bob', 200, 2], ['Alice', 50, 3]]
    L2 = [['Alice', 100, 4], ['Bob', 200, 5], ['Alice', 50, 6]]
    L3 = [['Alice', 100, 7], ['Bob', 200, 8], ['Alice', 50, 9]]

    t1 = Symbol('t1', 'var * {name: string, amount: int, id: int}')
    t2 = Symbol('t2', 'var * {name: string, amount: int, id: int}')
    t3 = Symbol('t3', 'var * {name: string, amount: int, id: int}')

    expr = union(t1, t2, t3)

    result = list(compute(expr, {t1: L1, t2: L2, t3: L3}))

    assert result == L1 + L2 + L3
Esempio n. 29
0
def test_merge():
    t = Symbol('t', 'int64')
    p = Symbol('p', 'var * {amount:int}')
    accounts = Symbol('accounts',
                           'var * {name: string, balance: int32, id: int32}')
    new_amount = (accounts.balance * 1.5).label('new')

    c = merge(accounts[['name', 'balance']], new_amount)
    assert c.fields == ['name', 'balance', 'new']
    assert c.schema == dshape('{name: string, balance: int32, new: float64}')

    with pytest.raises(ValueError):
        merge(t, t)
    with pytest.raises(ValueError):
        merge(t, p)
Esempio n. 30
0
def test_by():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')
    r = by(t['name'], total=sum(t['amount']))

    print(r.schema)
    assert isinstance(r.schema[0], Record)
    assert str(r.schema[0]['name']) == 'string'
Esempio n. 31
0
def test_relational():
    t = Symbol('t', 'var * {name: string, amount: int, id: int}')

    r = (t['name'] == 'Alice')

    assert 'bool' in str(r.dshape)
    assert r._name
Esempio n. 32
0
def test_by_multi_column_grouper():
    t = Symbol('t', 'var * {x: int, y: int, z: int}')
    expr = by(t[['x', 'y']], t['z'].count())
    data = [(1, 2, 0), (1, 2, 0), (1, 1, 0)]

    print(set(compute(expr, data)))
    assert set(compute(expr, data)) == set([(1, 2, 2), (1, 1, 1)])
Esempio n. 33
0
def test_selection_by_getattr():
    t = Symbol('t', 'var * {name: string, amount: int, id: int}')

    result = t[t.name == 'Alice']

    assert t.schema == result.schema
    assert 'Alice' in str(result)
Esempio n. 34
0
def test_datetime_comparison():
    data = [['Alice', date(2000, 1, 1)], ['Bob', date(2000, 2, 2)],
            ['Alice', date(2000, 3, 3)]]

    t = Symbol('t', 'var * {name: string, when: date}')

    assert list(compute(t[t.when > '2000-01-01'], data)) == data[1:]
Esempio n. 35
0
def test_dir():
    t = Symbol('t', 'var * {name: string, amount: int, dt: datetime}')
    assert 'day' in dir(t.dt)
    assert 'mean' not in dir(t.dt)
    assert 'mean' in dir(t.amount)
    assert 'like' not in dir(t[['amount', 'dt']])
    assert 'any' not in dir(t.name)
Esempio n. 36
0
def test_common_subexpression():
    a = Symbol('a', 'var * {x: int, y: int, z: int}')

    assert common_subexpression(a).isidentical(a)
    assert common_subexpression(a, a['x']).isidentical(a)
    assert common_subexpression(a['y'] + 1, a['x']).isidentical(a)
    assert common_subexpression(a['x'].map(inc, 'int'), a['x']).isidentical(a['x'])
Esempio n. 37
0
def test_schema_of_complex_interaction():
    a = Symbol('a', 'var * {x: int, y: int, z: int}')
    expr = (a['x'] + a['y']) / a['z']
    assert expr.schema == dshape('float64')

    expr = expr.label('foo')
    assert expr.schema == dshape('float64')
Esempio n. 38
0
def test_reduction():
    t = Symbol('t', 'var * {name: string, amount: int32}')
    r = sum(t['amount'])
    assert r.dshape in (dshape('int64'),
                        dshape('{amount: int64}'),
                        dshape('{amount_sum: int64}'))

    assert 'amount' not in str(t.count().dshape)

    assert t.count().dshape[0] in (int32, int64)

    assert 'int' in str(t.count().dshape)
    assert 'int' in str(t.nunique().dshape)
    assert 'string' in str(t['name'].max().dshape)
    assert 'string' in str(t['name'].min().dshape)
    assert 'string' not in str(t.count().dshape)

    t = Symbol('t', 'var * {name: string, amount: real, id: int}')

    assert 'int' in str(t['id'].sum().dshape)
    assert 'int' not in str(t['amount'].sum().dshape)
Esempio n. 39
0
import numpy as np
from pandas import DataFrame
import numpy as np
import bcolz
from datashape.predicates import isscalar, iscollection, isrecord
from blaze.expr import Symbol, by
from blaze.api import Data, into
from blaze.compute import compute
from blaze.expr.functions import sin, exp
from blaze.sql import SQL


sources = []

t = Symbol('t', 'var * {amount: int64, id: int64, name: string}')

L = [[100, 1, 'Alice'],
     [200, 2, 'Bob'],
     [300, 3, 'Charlie'],
     [400, 4, 'Dan'],
     [500, 5, 'Edith']]

df = DataFrame(L, columns=['amount', 'id', 'name'])

x = into(np.ndarray, df)

bc = into(bcolz.ctable, df)

sql = SQL('sqlite:///:memory:', 'accounts', schema=t.schema)
sql.extend(L)
Esempio n. 40
0
import numpy as np
from pandas import DataFrame
import numpy as np
import bcolz
from datashape.predicates import isscalar, iscollection, isrecord
from blaze.expr import Symbol, by
from blaze.api import Data, into
from blaze.compute import compute
from blaze.expr.functions import sin, exp
from blaze.sql import SQL


sources = []

t = Symbol('t', 'var * {amount: int64, id: int64, name: string}')

L = [[100, 1, 'Alice'],
     [200, 2, 'Bob'],
     [300, 3, 'Charlie'],
     [400, 4, 'Dan'],
     [500, 5, 'Edith']]

df = DataFrame(L, columns=['amount', 'id', 'name'])

x = into(np.ndarray, df)

bc = into(bcolz.ctable, df)

sql = SQL('sqlite:///:memory:', 'accounts', schema=t.schema)
sql.extend(L)
Esempio n. 41
0
def test_nelements_records(recdata):
    s = Symbol('s', discover(recdata))
    assert compute(s.nelements(), recdata) == np.prod(recdata.shape)
    np.testing.assert_array_equal(compute(s.nelements(axis=0), recdata),
                                  np.zeros(recdata.shape[1]) + recdata.shape[0])
Esempio n. 42
0
def test_count_nan():
    t = Symbol('t', '3 * ?real')
    x = np.array([1.0, np.nan, 2.0])
    assert compute(t.count(), x) == 2
Esempio n. 43
0
def test_improper_selection():
    t = Symbol('t', 'var * {x: int, y: int, z: int}')

    assert raises(Exception, lambda: t[t['x'] > 0][t.sort()[t['y' > 0]]])
Esempio n. 44
0
def test_errors():
    t = Symbol('t', 'var * {foo: int}')
    with raises(NotImplementedError):
        compute_up(by(t, t.count()), 1)
Esempio n. 45
0
def test_summary_keepdims():
    x = Symbol('x', '5 * 3 * float32')
    assert summary(a=x.min(), b=x.max()).dshape == \
            dshape('{a: float32, b: float32}')
    assert summary(a=x.min(), b=x.max(), keepdims=True).dshape == \
            dshape('1 * 1 * {a: float32, b: float32}')
Esempio n. 46
0
def test_axis_kwarg_is_normalized_to_tuple():
    x = Symbol('x', '5 * 3 * float32')
    exprs = [x.sum(), x.sum(axis=1), x.sum(axis=[1]), x.std(), x.mean(axis=1)]
    for expr in exprs:
        assert isinstance(expr.axis, tuple)
Esempio n. 47
0
def test_reduction_dshape():
    x = Symbol('x', '5 * 3 * float32')
    assert x.sum().dshape == x.schema
    assert x.sum(axis=0).dshape == dshape('3 * float32')
    assert x.sum(axis=1).dshape == dshape('5 * float32')
    assert x.sum(axis=(0, 1)).dshape == dshape('float32')
Esempio n. 48
0
def test_count():
    t = Symbol('t', '3 * int')
    assert compute(t.count(), [1, None, 2]) == 2
Esempio n. 49
0
def test_head():
    t = Symbol('t', 'var * {name: string, amount: int32, id: int32}')
    s = t.head(10)
    assert eval(str(s)).isidentical(s)

    assert s.schema == t.schema