def test_transform_then_project_single_column(): expr = transform(t, foo=t.id + 1)[['foo', 'id']] result = normalize(str(compute(expr, s))) expected = normalize("""SELECT accounts.id + :id_1 as foo, accounts.id FROM accounts""") assert result == expected
def test_normalize_reduction(): expr = by(t.name, counts=t.count()) expr = transform(expr, normed_counts=expr.counts / expr.counts.max()) result = str(compute(expr, s)) expected = """WITH alias AS (SELECT count(accounts.id) AS counts FROM accounts GROUP BY accounts.name) SELECT alias.counts / max(alias.counts) AS normed_counts FROM alias""" assert normalize(result) == normalize(expected)
def test_transform_order(): r = transform(t, sin_amount=sin(t.amount), cos_id=cos(t.id)) result = compute(r, s) expected = """SELECT accounts.name, accounts.amount, accounts.id, cos(accounts.id) as cos_id, sin(accounts.amount) as sin_amount FROM accounts """ assert normalize(str(result)) == normalize(expected)
def test_transform_filter_by_single_column(): t2 = t[t.amount < 0] tr = transform(t2, abs_amt=abs(t2.amount), sine=sin(t2.id)) expr = by(tr.name, avg_amt=tr.abs_amt.mean()) result = compute(expr, s) expected = normalize("""SELECT accounts.name, avg(abs(accounts.amount)) AS avg_amt FROM accounts WHERE accounts.amount < :amount_1 GROUP BY accounts.name """) assert normalize(str(result)) == expected
def test_path_split(): expr = t.amount.sum() + 1 assert path_split(t, expr).isidentical(t.amount.sum()) expr = t.amount.distinct().sort() assert path_split(t, expr).isidentical(t.amount.distinct()) t2 = transform(t, id=t.id * 2) expr = by(t2.id, amount=t2.amount.sum()).amount + 1 assert path_split(t, expr).isidentical(by(t2.id, amount=t2.amount.sum())) expr = count(t.amount.distinct()) assert path_split(t, expr).isidentical(t.amount.distinct()) expr = summary(total=t.amount.sum()) assert path_split(t, expr).isidentical(expr)
def test_transform_filter_by_different_order(): t2 = transform(t, abs_amt=abs(t.amount), sine=sin(t.id)) tr = t2[t2.amount < 0] expr = by(tr.name, avg_amt=tr.abs_amt.mean(), avg_sine=tr.sine.sum() / tr.sine.count()) result = compute(expr, s) expected = normalize("""SELECT accounts.name, avg(abs(accounts.amount)) AS avg_amt, sum(sin(accounts.id)) / count(sin(accounts.id)) AS avg_sine FROM accounts WHERE accounts.amount < :amount_1 GROUP BY accounts.name """) assert normalize(str(result)) == expected
def test_transform_where(): t2 = t[t.id == 1] expr = transform(t2, abs_amt=abs(t2.amount), sine=sin(t2.id)) result = compute(expr, s) expected = """SELECT accounts.name, accounts.amount, accounts.id, abs(accounts.amount) as abs_amt, sin(accounts.id) as sine FROM accounts WHERE accounts.id = :id_1 """ assert normalize(str(result)) == normalize(expected)
def test_merge_compute(): data = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)] ds = datashape.dshape('var * {id: int, name: string, amount: real}') s = symbol('s', ds) with tmpfile('db') as fn: uri = 'sqlite:///' + fn into(uri + '::table', data, dshape=ds) expr = transform(s, amount10=s.amount * 10) result = into(list, compute(expr, {s: data})) assert result == [(1, 'Alice', 100, 1000), (2, 'Bob', 200, 2000), (4, 'Dennis', 400, 4000)]
def test_path_issue(): t = symbol('t', "{topic: string, word: string, result: ?float64}") t2 = transform(t, sizes=t.result.map(lambda x: (x - MIN)*10/(MAX - MIN), schema='float64', name='size')) assert builtins.any(t2.sizes.isidentical(node) for node in t2.children)
def test_transform(): expr = transform(t, x=t.amount / t.id) assert list(compute(expr, data)) == [('Alice', 100, 1, 100), ('Bob', 200, 2, 100), ('Alice', 50, 3, 50 / 3)]
def test_coalesce(): # check case where lhs is not optional s = symbol("s", "int32") t = symbol("t", "int32") expr = coalesce(s, t) assert expr.isidentical(s) s_expr = s + s t_expr = t * 3 expr = coalesce(s_expr, t_expr) assert expr.isidentical(s_expr) a = symbol("a", "string") b = symbol("b", "string") expr = coalesce(a, b) assert expr.isidentical(a) a_expr = a + a b_expr = b * 3 expr = coalesce(a_expr, b_expr) assert expr.isidentical(a_expr) c = symbol("c", "{a: int32, b: int32}") d = symbol("d", "{a: int32, b: int32}") expr = coalesce(c, d) assert expr.isidentical(c) c_expr = transform(c, a=c.a + 1) d_expr = transform(d, a=d.a * 3) expr = coalesce(c_expr, d_expr) assert expr.isidentical(c_expr) # check case where lhs is null dshape u = symbol("u", "null") expr = coalesce(u, s) assert expr.isidentical(s) expr = coalesce(u, a) assert expr.isidentical(a) expr = coalesce(u, c) assert expr.isidentical(c) # check optional lhs non-optional rhs v = symbol("v", "?int32") expr = coalesce(v, s) # rhs is not optional so the expression cannot be null assert_dshape_equal(expr.dshape, dshape("int32")) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(s) e = symbol("e", "?string") expr = coalesce(e, a) assert_dshape_equal(expr.dshape, dshape("string")) assert expr.lhs.isidentical(e) assert expr.rhs.isidentical(a) f = symbol("f", "?{a: int32, b: int32}") expr = coalesce(f, c) assert_dshape_equal(expr.dshape, dshape("{a: int32, b: int32}")) assert expr.lhs.isidentical(f) assert expr.rhs.isidentical(c) # check optional lhs non-optional rhs with promotion w = symbol("w", "int64") expr = coalesce(v, w) # rhs is not optional so the expression cannot be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape("int64")) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(w) # check optional lhs and rhs x = symbol("x", "?int32") expr = coalesce(v, x) # rhs and lhs are optional so this might be null assert_dshape_equal(expr.dshape, dshape("?int32")) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(x) # check optional lhs and rhs with promotion y = symbol("y", "?int64") expr = coalesce(v, y) # rhs and lhs are optional so this might be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape("?int64")) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(y)
def test_path_issue(): t = TableSymbol('t', "{topic: string, word: string, result: ?float64}") t2 = transform(t, sizes=t.result.map(lambda x: (x - MIN)*10/(MAX - MIN), schema='float64', name='size')) assert t2.sizes in t2.children
def test_coalesce(): # check case where lhs is not optional s = symbol('s', 'int32') t = symbol('t', 'int32') expr = coalesce(s, t) assert expr.isidentical(s) s_expr = s + s t_expr = t * 3 expr = coalesce(s_expr, t_expr) assert expr.isidentical(s_expr) a = symbol('a', 'string') b = symbol('b', 'string') expr = coalesce(a, b) assert expr.isidentical(a) a_expr = a + a b_expr = b * 3 expr = coalesce(a_expr, b_expr) assert expr.isidentical(a_expr) c = symbol('c', '{a: int32, b: int32}') d = symbol('d', '{a: int32, b: int32}') expr = coalesce(c, d) assert expr.isidentical(c) c_expr = transform(c, a=c.a + 1) d_expr = transform(d, a=d.a * 3) expr = coalesce(c_expr, d_expr) assert expr.isidentical(c_expr) # check case where lhs is null dshape u = symbol('u', 'null') expr = coalesce(u, s) assert expr.isidentical(s) expr = coalesce(u, a) assert expr.isidentical(a) expr = coalesce(u, c) assert expr.isidentical(c) # check optional lhs non-optional rhs v = symbol('v', '?int32') expr = coalesce(v, s) # rhs is not optional so the expression cannot be null assert_dshape_equal(expr.dshape, dshape('int32')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(s) e = symbol('e', '?string') expr = coalesce(e, a) assert_dshape_equal(expr.dshape, dshape('string')) assert expr.lhs.isidentical(e) assert expr.rhs.isidentical(a) f = symbol('f', '?{a: int32, b: int32}') expr = coalesce(f, c) assert_dshape_equal(expr.dshape, dshape('{a: int32, b: int32}')) assert expr.lhs.isidentical(f) assert expr.rhs.isidentical(c) # check optional lhs non-optional rhs with promotion w = symbol('w', 'int64') expr = coalesce(v, w) # rhs is not optional so the expression cannot be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape('int64')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(w) # check optional lhs and rhs x = symbol('x', '?int32') expr = coalesce(v, x) # rhs and lhs are optional so this might be null assert_dshape_equal(expr.dshape, dshape('?int32')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(x) # check optional lhs and rhs with promotion y = symbol('y', '?int64') expr = coalesce(v, y) # rhs and lhs are optional so this might be null # there are no either types in datashape so we are a type large enough # to hold either result assert_dshape_equal(expr.dshape, dshape('?int64')) assert expr.lhs.isidentical(v) assert expr.rhs.isidentical(y)