Beispiel #1
0
def test_distinct_name():
    t = TableSymbol("t", "{id: int32, name: string}")

    assert t.name.isidentical(t["name"])
    assert t.distinct().name.isidentical(t.distinct()["name"])
    assert t.id.distinct()._name == "id"
    assert t.name._name == "name"
Beispiel #2
0
def test_subterms():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    assert list(a._subterms()) == [a]
    assert set(a['x']._subterms()) == set([a, a['x']])
    assert set(a['x'].map(inc, 'int')._subterms()) == \
            set([a, a['x'], a['x'].map(inc, 'int')])
    assert a in set((a['x'] + 1)._subterms())
Beispiel #3
0
def test_distinct_name():
    t = TableSymbol('t', '{id: int32, name: string}')

    assert t.name.isidentical(t['name'])
    assert t.distinct().name.isidentical(t.distinct()['name'])
    assert t.id.distinct()._name == 'id'
    assert t.name._name == 'name'
Beispiel #4
0
def test_relabel_join():
    names = TableSymbol('names', '{first: string, last: string}')

    siblings = join(names.relabel({'last': 'left'}),
                    names.relabel({'last': 'right'}), 'first')

    assert siblings.fields == ['first', 'left', 'right']
Beispiel #5
0
def test_by_columns():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    assert len(by(t['id'], total=t['amount'].sum()).fields) == 2
    assert len(by(t['id'], count=t['id'].count()).fields) == 2
    print(by(t, count=t.count()).fields)
    assert len(by(t, count=t.count()).fields) == 4
Beispiel #6
0
def test_by_columns():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")

    assert len(by(t["id"], total=t["amount"].sum()).fields) == 2
    assert len(by(t["id"], count=t["id"].count()).fields) == 2
    print(by(t, count=t.count()).fields)
    assert len(by(t, count=t.count()).fields) == 4
Beispiel #7
0
def test_table_name():
    t = TableSymbol('t', '10 * {people: string, amount: int}')
    r = TableSymbol('r', 'int64')
    with pytest.raises(AttributeError):
        t.name
    with pytest.raises(AttributeError):
        r.name
Beispiel #8
0
def test_broadcast():
    from blaze.expr.arithmetic import Add, Eq, Mult, Le
    t = TableSymbol('t', '{x: int, y: int, z: int}')
    t2 = TableSymbol('t', '{a: int, b: int, c: int}')
    x = t['x']
    y = t['y']
    z = t['z']
    a = t2['a']
    b = t2['b']
    c = t2['c']

    assert str(broadcast(Add, x, y)._expr) == 'x + y'
    assert broadcast(Add, x, y)._child.isidentical(t)

    c1 = broadcast(Add, x, y)
    c2 = broadcast(Mult, x, z)

    assert eval_str(broadcast(Eq, c1, c2)._expr) == '(x + y) == (x * z)'
    assert broadcast(Eq, c1, c2)._child.isidentical(t)

    assert str(broadcast(Add, x, 1)._expr) == 'x + 1'

    assert str(x <= y) == "t.x <= t.y"
    assert str(x >= y) == "t.x >= t.y"
    assert str(x | y) == "t.x | t.y"
    assert str(x.__ror__(y)) == "t.y | t.x"
    assert str(x.__rand__(y)) == "t.y & t.x"

    with pytest.raises(ValueError):
        broadcast(Add, x, a)
Beispiel #9
0
def test_by_columns():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    assert len(by(t['id'], t['amount'].sum()).fields) == 2
    assert len(by(t['id'], t['id'].count()).fields) == 2
    print(by(t, t.count()).fields)
    assert len(by(t, t.count()).fields) == 4
Beispiel #10
0
def test_subterms():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    assert list(a._subterms()) == [a]
    assert set(a['x']._subterms()) == set([a, a['x']])
    assert set(a['x'].map(inc, 'int')._subterms()) == \
            set([a, a['x'], a['x'].map(inc, 'int')])
    assert a in set((a['x'] + 1)._subterms())
Beispiel #11
0
def test_relabel_join():
    names = TableSymbol('names', '{first: string, last: string}')

    siblings = join(names.relabel({'last': 'left'}),
                    names.relabel({'last': 'right'}), 'first')

    assert siblings.fields == ['first', 'left', 'right']
Beispiel #12
0
def test_distinct_name():
    t = TableSymbol('t', '{id: int32, name: string}')

    assert t.name.isidentical(t['name'])
    assert t.distinct().name.isidentical(t.distinct()['name'])
    assert t.id.distinct()._name == 'id'
    assert t.name._name == 'name'
Beispiel #13
0
def test_traverse():
    t = TableSymbol('t', '{name: string, amount: int}')
    assert t in list(t._traverse())

    expr = t.amount.sum()
    trav = list(expr._traverse())
    assert builtins.any(t.amount.isidentical(x) for x in trav)
Beispiel #14
0
def test_traverse():
    t = TableSymbol('t', '{name: string, amount: int}')
    assert t in list(t._traverse())

    expr = t.amount.sum()
    trav = list(expr._traverse())
    assert builtins.any(t.amount.isidentical(x) for x in trav)
Beispiel #15
0
def test_Distinct():
    t = TableSymbol('t', '{name: string, amount: int32}')
    r = distinct(t['name'])
    print(r.dshape)
    assert r.dshape  == dshape('var * {name: string}')

    r = t.distinct()
    assert r.dshape  == t.dshape
Beispiel #16
0
def test_traverse():
    t = TableSymbol('t', '{name: string, amount: int}')
    assert t in list(t.traverse())

    expr = t[t['amount'] < 0]['name']
    trav = list(expr.traverse())
    assert any(t['amount'].isidentical(x) for x in trav)
    assert any((t['amount'] < 0).isidentical(x) for x in trav)
Beispiel #17
0
def test_subs():
    from blaze.expr import TableSymbol
    t = TableSymbol('t', '{name: string, amount: int, id: int}')
    expr = t['amount'] + 3
    assert expr._subs({3: 4, 'amount': 'id'}).isidentical(t['id'] + 4)

    t2 = TableSymbol('t', '{name: string, amount: int}')
    assert t['amount']._subs({t: t2}).isidentical(t2['amount'])
Beispiel #18
0
def test_discover_dshape_symbol(ds):
    t_ds = TableSymbol('t', dshape=ds)
    assert t_ds.fields is not None

    t_sch = TableSymbol('t', dshape=ds.subshape[0])
    assert t_sch.fields is not None

    assert t_ds.isidentical(t_sch)
Beispiel #19
0
def test_symbol_projection_failures():
    t = TableSymbol('t', '10 * {name: string, amount: int}')
    with pytest.raises(ValueError):
        t._project(['name', 'id'])
    with pytest.raises(AttributeError):
        t.foo
    with pytest.raises(TypeError):
        t._project(t.dshape)
Beispiel #20
0
def test_discover_dshape_symbol(ds):
    t_ds = TableSymbol('t', dshape=ds)
    assert t_ds.fields is not None

    t_sch = TableSymbol('t', dshape=ds.subshape[0])
    assert t_sch.fields is not None

    assert t_ds.isidentical(t_sch)
Beispiel #21
0
def test_symbol_projection_failures():
    t = TableSymbol("t", "10 * {name: string, amount: int}")
    with pytest.raises(ValueError):
        t._project(["name", "id"])
    with pytest.raises(AttributeError):
        t.foo
    with pytest.raises(TypeError):
        t._project(t.dshape)
Beispiel #22
0
def test_Distinct():
    t = TableSymbol("t", "{name: string, amount: int32}")
    r = distinct(t["name"])
    print(r.dshape)
    assert r.dshape == dshape("var * string")
    assert r._name == "name"

    r = t.distinct()
    assert r.dshape == t.dshape
Beispiel #23
0
def test_length():
    t = TableSymbol('t', '10 * {name: string, amount: int}')
    assert t.dshape == dshape('10 * {name: string, amount: int}')
    assert len(t) == 10
    assert len(t.name) == 10
    assert len(t[['name']]) == 10
    assert len(t.sort('name')) == 10
    assert len(t.head(5)) == 5
    assert len(t.head(50)) == 10
Beispiel #24
0
def test_sort():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")
    s = t.sort("amount", ascending=True)
    print(str(s))
    assert eval(str(s)).isidentical(s)

    assert s.schema == t.schema

    assert t["amount"].sort().key == "amount"
Beispiel #25
0
def test_relabel():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.columns)
    assert rl.columns == ['NAME', 'amount', 'ID']
Beispiel #26
0
def test_Distinct():
    t = TableSymbol('t', '{name: string, amount: int32}')
    r = distinct(t['name'])
    print(r.dshape)
    assert r.dshape == dshape('var * string')
    assert r._name == 'name'

    r = t.distinct()
    assert r.dshape == t.dshape
Beispiel #27
0
def test_sort():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    s = t.sort('amount', ascending=True)
    print(str(s))
    assert eval(str(s)).isidentical(s)

    assert s.schema == t.schema

    assert t['amount'].sort().key == 'amount'
Beispiel #28
0
def test_sort():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    s = t.sort('amount', ascending=True)
    print(str(s))
    assert eval(str(s)).isidentical(s)

    assert s.schema == t.schema

    assert t['amount'].sort().key == 'amount'
Beispiel #29
0
def test_like():
    t = TableSymbol('t', '{name: string, city: string}')
    data = [('Alice Smith', 'New York'),
            ('Bob Smith', 'Chicago'),
            ('Alice Walker', 'LA')]

    assert list(compute(t.like(name='Alice*'), data)) == [data[0], data[2]]
    assert list(compute(t.like(name='lice*'), data)) == []
    assert list(compute(t.like(name='*Smith*'), data)) == [data[0], data[1]]
    assert list(compute(t.like(name='*Smith*', city='New York'), data)) == [data[0]]
Beispiel #30
0
def test_union():
    schema = '{x: int, y: int, z: int}'
    a = TableSymbol('a', schema)
    b = TableSymbol('b', schema)
    c = TableSymbol('c', schema)

    u = union(a, b, c)
    assert u.schema == a.schema

    assert raises(Exception,
                  lambda: union(a, TableSymbol('q', '{name: string}')))
Beispiel #31
0
def test_multi_column_join():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    b = TableSymbol('b', '{w: int, x: int, y: int}')
    j = join(a, b, ['x', 'y'])

    assert set(j.fields) == set('wxyz')

    assert j.on_left == j.on_right == ['x', 'y']
    assert hash(j)

    assert j.fields == ['x', 'y', 'z', 'w']
Beispiel #32
0
def test_length():
    t = TableSymbol("t", "10 * {name: string, amount: int}")
    s = TableSymbol("s", "{name:string, amount:int}")
    assert t.dshape == dshape("10 * {name: string, amount: int}")
    assert len(t) == 10
    assert len(t.name) == 10
    assert len(t[["name"]]) == 10
    assert len(t.sort("name")) == 10
    assert len(t.head(5)) == 5
    assert len(t.head(50)) == 10
    with pytest.raises(ValueError):
        len(s)
Beispiel #33
0
def test_leaves():
    t = TableSymbol("t", "{id: int32, name: string}")
    v = TableSymbol("v", "{id: int32, city: string}")
    x = symbol("x", "int32")

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, count=t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #34
0
def test_leaves():
    t = TableSymbol('t', '{id: int32, name: string}')
    v = TableSymbol('v', '{id: int32, city: string}')
    x = Symbol('x', 'int32')

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #35
0
def test_serializable():
    t = TableSymbol('t', '{id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t))

    assert t.isidentical(t2)

    s = TableSymbol('t', '{id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr))

    assert expr.isidentical(expr2)
Beispiel #36
0
def test_length():
    t = TableSymbol('t', '10 * {name: string, amount: int}')
    s = TableSymbol('s', '{name:string, amount:int}')
    assert t.dshape == dshape('10 * {name: string, amount: int}')
    assert len(t) == 10
    assert len(t.name) == 10
    assert len(t[['name']]) == 10
    assert len(t.sort('name')) == 10
    assert len(t.head(5)) == 5
    assert len(t.head(50)) == 10
    with pytest.raises(ValueError):
        len(s)
Beispiel #37
0
def test_serializable():
    t = TableSymbol('t', '{id: int, name: string, amount: int}')
    import pickle
    t2 = pickle.loads(pickle.dumps(t))

    assert t.isidentical(t2)

    s = TableSymbol('t', '{id: int, city: string}')
    expr = join(t[t.amount < 0], s).sort('id').city.head()
    expr2 = pickle.loads(pickle.dumps(expr))

    assert expr.isidentical(expr2)
Beispiel #38
0
def test_leaves():
    t = TableSymbol('t', '{id: int32, name: string}')
    v = TableSymbol('v', '{id: int32, city: string}')
    x = symbol('x', 'int32')

    assert t._leaves() == [t]
    assert t.id._leaves() == [t]
    assert by(t.name, count=t.id.nunique())._leaves() == [t]
    assert join(t, v)._leaves() == [t, v]
    assert join(v, t)._leaves() == [v, t]

    assert (x + 1)._leaves() == [x]
Beispiel #39
0
def test_path():
    from blaze.expr import TableSymbol, join
    t = TableSymbol('t', '{name: string, amount: int, id: int}')
    v = TableSymbol('v', '{city: string, id: int}')
    expr = t['amount'].sum()

    assert list(path(expr, t)) == [t.amount.sum(), t.amount, t]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]
    assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount]

    expr = join(t, v).amount
    assert list(path(expr, t)) == [join(t, v).amount, join(t, v), t]
    assert list(path(expr, v)) == [join(t, v).amount, join(t, v), v]
Beispiel #40
0
def test_relabel():
    t = TableSymbol("t", "{name: string, amount: int32, id: int32}")

    rl = t.relabel({"name": "NAME", "id": "ID"})
    rlc = t["amount"].relabel({"amount": "BALANCE"})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ["NAME", "amount", "ID"]

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Beispiel #41
0
def test_relabel():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})
    rlc = t['amount'].relabel({'amount': 'BALANCE'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ['NAME', 'amount', 'ID']

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
Beispiel #42
0
def test_relabel():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')

    rl = t.relabel({'name': 'NAME', 'id': 'ID'})
    rlc = t['amount'].relabel({'amount': 'BALANCE'})

    assert eval(str(rl)).isidentical(rl)

    print(rl.fields)
    assert rl.fields == ['NAME', 'amount', 'ID']

    assert not isscalar(rl.dshape.measure)
    assert isscalar(rlc.dshape.measure)
def test_Distinct():
    x = np.array([('Alice', 100),
                  ('Alice', -200),
                  ('Bob', 100),
                  ('Bob', 100)],
                dtype=[('name', 'S5'), ('amount', 'i8')])

    t = TableSymbol('t', '{name: string, amount: int64}')

    assert eq(compute(t['name'].distinct(), x),
              np.unique(x['name']))
    assert eq(compute(t.distinct(), x),
              np.unique(x))
Beispiel #44
0
def test_scalar_expr():
    t = TableSymbol('t', '{x: int64, y: int32, z: int64}')
    x = t.x._expr
    y = t.y._expr
    assert 'int64' in str(x.dshape)
    assert 'int32' in str(y.dshape)

    expr = (t.x + 1)._expr
    assert expr._inputs[0].dshape == x.dshape
    assert expr._inputs[0].isidentical(x)

    t = TableSymbol('t', '{ amount : int64, id : int64, name : string }')
    expr = (t.amount + 1)._expr
    assert 'int64' in str(expr._inputs[0].dshape)
Beispiel #45
0
def test_relabel_join():
    names = TableSymbol('names', '{first: string, last: string}')

    siblings = join(names.relabel({'first': 'left'}),
                    names.relabel({'first': 'right'}),
                    'last')[['left', 'right']]

    data = [('Alice', 'Smith'),
            ('Bob', 'Jones'),
            ('Charlie', 'Smith')]

    print(set(compute(siblings, {names: data})))
    assert ('Alice', 'Charlie') in set(compute(siblings, {names: data}))
    assert ('Alice', 'Bob') not in set(compute(siblings, {names: data}))
Beispiel #46
0
def test_merge():
    t = TableSymbol('t', 'int64')
    p = TableSymbol('p', '{amount:int}')
    accounts = TableSymbol('accounts',
                           '{name: string, balance: int32, id: int32}')
    new_amount = (accounts.balance * 1.5).label('new')

    c = merge(accounts[['name', 'balance']], new_amount)
    assert c.fields == ['name', 'balance', 'new']
    assert c.schema == dshape('{name: string, balance: int32, new: float64}')

    with pytest.raises(ValueError):
        merge(t, t)
    with pytest.raises(ValueError):
        merge(t, p)
Beispiel #47
0
def test_common_subexpression():
    a = TableSymbol('a', '{x: int, y: int, z: int}')

    assert common_subexpression(a).isidentical(a)
    assert common_subexpression(a, a['x']).isidentical(a)
    assert common_subexpression(a['y'] + 1, a['x']).isidentical(a)
    assert common_subexpression(a['x'].map(inc), a['x']).isidentical(a['x'])
Beispiel #48
0
def test_selection_by_getattr():
    t = TableSymbol('t', '{name: string, amount: int, id: int}')

    result = t[t.name == 'Alice']

    assert t.schema == result.schema
    assert 'Alice' in str(result)
Beispiel #49
0
def test_columns_attribute_for_backwards_compatibility():
    t = TableSymbol('t', '{name: string, amount: int, dt: datetime}')

    assert t.columns == t.fields

    assert 'columns' in dir(t)
    assert 'columns' not in dir(t.name)
Beispiel #50
0
def test_dir():
    t = TableSymbol('t', '{name: string, amount: int, dt: datetime}')
    assert 'day' in dir(t.dt)
    assert 'mean' not in dir(t.dt)
    assert 'mean' in dir(t.amount)
    assert 'like' not in dir(t[['amount', 'dt']])
    assert 'any' not in dir(t.name)
Beispiel #51
0
def test_by():
    t = TableSymbol('t', '{name: string, amount: int32, id: int32}')
    r = by(t['name'], total=sum(t['amount']))

    print(r.schema)
    assert isinstance(r.schema[0], Record)
    assert str(r.schema[0]['name']) == 'string'
Beispiel #52
0
def test_schema_of_complex_interaction():
    a = TableSymbol('a', '{x: int, y: int, z: int}')
    expr = (a['x'] + a['y']) / a['z']
    assert expr.schema == dshape('real')

    expr = expr.label('foo')
    assert expr.schema == dshape('real')
Beispiel #53
0
def test_relational():
    t = TableSymbol('t', '{name: string, amount: int, id: int}')

    r = (t['name'] == 'Alice')

    assert 'bool' in str(r.dshape)
    assert r._name
Beispiel #54
0
def test_arithmetic():
    t = TableSymbol('t', '{x: int, y: int, z: int}')
    x, y, z = t['x'], t['y'], t['z']
    exprs = [
        x + 1, x + y, 1 + y, x - y, 1 - x, x - 1, x**y, x**2, 2**x, x * y,
        x**2, 2**x, x / y, x / 2, 2 / x, x % y, x % 2, 2 % x
    ]
Beispiel #55
0
def test_path_issue():
    t = TableSymbol('t', "{topic: string, word: string, result: ?float64}")
    t2 = transform(t,
                   sizes=t.result.map(lambda x: (x - MIN) * 10 / (MAX - MIN),
                                      schema='float64',
                                      name='size'))

    assert t2.sizes in t2.children
Beispiel #56
0
def test_path_issue():
    t = TableSymbol('t', "{topic: string, word: string, result: ?float64}")
    t2 = transform(t,
                   sizes=t.result.map(lambda x: (x - MIN) * 10 / (MAX - MIN),
                                      schema='float64',
                                      name='size'))

    assert builtins.any(t2.sizes.isidentical(node) for node in t2.children)
Beispiel #57
0
def test_like():
    t = TableSymbol('t', '{name: string, amount: int, city: string}')

    expr = like(t, name='Alice*')

    assert eval(str(expr)).isidentical(expr)
    assert expr.schema == t.schema
    assert expr.dshape[0] == datashape.var
Beispiel #58
0
def test_reduction():
    t = TableSymbol("t", "{name: string, amount: int32}")
    r = sum(t["amount"])
    assert r.dshape in (dshape("int64"), dshape("{amount: int64}"), dshape("{amount_sum: int64}"))

    assert "amount" not in str(t.count().dshape)

    assert t.count().dshape[0] in (int32, int64)

    assert "int" in str(t.count().dshape)
    assert "int" in str(t.nunique().dshape)
    assert "string" in str(t["name"].max().dshape)
    assert "string" in str(t["name"].min().dshape)
    assert "string" not in str(t.count().dshape)

    t = TableSymbol("t", "{name: string, amount: real, id: int}")

    assert "int" in str(t["id"].sum().dshape)
    assert "int" not in str(t["amount"].sum().dshape)
Beispiel #59
0
def test_reduction():
    t = TableSymbol('t', '{name: string, amount: int32}')
    r = sum(t['amount'])
    assert r.dshape in (dshape('int64'),
                        dshape('{amount: int64}'),
                        dshape('{amount_sum: int64}'))

    assert 'amount' not in str(t.count().dshape)

    assert t.count().dshape[0] in (int32, int64)

    assert 'int' in str(t.count().dshape)
    assert 'int' in str(t.nunique().dshape)
    assert 'string' in str(t['name'].max().dshape)
    assert 'string' in str(t['name'].min().dshape)
    assert 'string' not in str(t.count().dshape)

    t = TableSymbol('t', '{name: string, amount: real, id: int}')

    assert 'int' in str(t['id'].sum().dshape)
    assert 'int' not in str(t['amount'].sum().dshape)
Beispiel #60
0
def test_improper_selection():
    t = TableSymbol('t', '{x: int, y: int, z: int}')

    assert raises(Exception, lambda: t[t['x'] > 0][t.sort()[t['y' > 0]]])