def test_distinct_count_on_projection(): expr = t[['amount']].distinct().count() result = compute(expr, {t: s}) assert ( normalize(str(result)) == normalize(""" SELECT count(DISTINCT accounts.amount) FROM accounts""") or normalize(str(result)) == normalize(""" SELECT count(alias.amount) as count FROM (SELECT DISTINCT accounts.amount AS amount FROM accounts) as alias""")) # note that id is the primary key expr = t[['amount', 'id']].distinct().count() result = compute(expr, {t: s}) assert normalize(str(result)) == normalize(""" SELECT count(alias.id) as count FROM (SELECT DISTINCT accounts.amount AS amount, accounts.id AS id FROM accounts) as alias""")
def test_clean_join(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) friends = sa.Table('friends', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) tcity = symbol('city', discover(city)) tfriends = symbol('friends', discover(friends)) tname = symbol('name', discover(name)) ns = {tname: name, tfriends: friends, tcity: city} expr = join(tfriends, tname, 'a', 'id') assert normalize(str(compute(expr, ns))) == normalize(""" SELECT friends.a, friends.b, name.name FROM friends JOIN name on friends.a = name.id""") expr = join(join(tfriends, tname, 'a', 'id'), tcity, 'a', 'id') assert normalize(str(compute(expr, ns))) == normalize(""" SELECT friends.a, friends.b, name.name, place.city, place.country FROM friends JOIN name ON friends.a = name.id JOIN place ON friends.a = place.id """)
def test_join(): metadata = sa.MetaData() lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) rhs = sa.Table('ids', metadata, sa.Column('name', sa.String), sa.Column('id', sa.Integer)) expected = lhs.join(rhs, lhs.c.name == rhs.c.name) expected = select(list(unique(expected.columns, key=lambda c: c.name))).select_from(expected) L = symbol('L', 'var * {name: string, amount: int}') R = symbol('R', 'var * {name: string, id: int}') joined = join(L, R, 'name') result = compute(joined, {L: lhs, R: rhs}) assert normalize(str(result)) == normalize(""" SELECT amounts.name, amounts.amount, ids.id FROM amounts JOIN ids ON amounts.name = ids.name""") assert str(select(result)) == str(select(expected)) # Schemas match assert list(result.c.keys()) == list(joined.fields) # test sort on join result = compute(joined.sort('amount'), {L: lhs, R: rhs}) assert normalize(str(result)) == normalize(""" SELECT amounts.name, amounts.amount, ids.id FROM amounts JOIN ids ON amounts.name = ids.name ORDER BY amounts.amount""")
def test_arithmetic(): assert str(computefull(t['amount'] + t['id'], s)) == \ str(sa.select([s.c.amount + s.c.id])) assert str(compute(t['amount'] + t['id'], s)) == str(s.c.amount + s.c.id) assert str(compute(t['amount'] * t['id'], s)) == str(s.c.amount * s.c.id) assert str(computefull(t['amount'] + t['id'] * 2, s)) == \ str(sa.select([s.c.amount + s.c.id * 2]))
def test_reductions(): assert str(compute(sum(t['amount']), s, post_compute=False)) == \ str(sa.sql.functions.sum(s.c.amount)) assert str(compute(mean(t['amount']), s, post_compute=False)) == \ str(sa.sql.func.avg(s.c.amount)) assert str(compute(count(t['amount']), s, post_compute=False)) == \ str(sa.sql.func.count(s.c.amount)) assert 'amount_sum' == compute(sum(t['amount']), s, post_compute=False).name
def test_outer_join(): L = Symbol('L', 'var * {id: int, name: string, amount: real}') R = Symbol('R', 'var * {city: string, id: int}') from blaze.sql import SQL engine = sa.create_engine('sqlite:///:memory:') _left = [(1, 'Alice', 100), (2, 'Bob', 200), (4, 'Dennis', 400)] left = SQL(engine, 'left', schema=L.schema) left.extend(_left) _right = [('NYC', 1), ('Boston', 1), ('LA', 3), ('Moscow', 4)] right = SQL(engine, 'right', schema=R.schema) right.extend(_right) conn = engine.connect() query = compute(join(L, R, how='inner'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='left'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (4, 'Dennis', 400, 'Moscow')]) query = compute(join(L, R, how='right'), {L: left.table, R: right.table}) print(query) result = list(map(tuple, conn.execute(query).fetchall())) print(result) assert set(result) == set([(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) # SQLAlchemy doesn't support full outer join """ query = compute(join(L, R, how='outer'), {L: left.table, R: right.table}) result = list(map(tuple, conn.execute(query).fetchall())) assert set(result) == set( [(1, 'Alice', 100, 'NYC'), (1, 'Alice', 100, 'Boston'), (2, 'Bob', 200, None), (3, None, None, 'LA'), (4, 'Dennis', 400, 'Moscow')]) """ conn.close()
def test_count_on_table(): assert normalize(str(select(compute(t.count(), s)))) == normalize(""" SELECT count(accounts.id) as count_1 FROM accounts""") assert normalize(str(select(compute(t[t.amount > 0].count(), s)))) == \ normalize(""" SELECT count(accounts.id) as count_1 FROM accounts WHERE accounts.amount > :amount_1""")
def test_sort_on_distinct(): assert normalize(str(compute(t.amount.sort(), s))) == normalize(""" SELECT accounts.amount FROM accounts ORDER BY accounts.amount""") assert normalize(str(compute(t.amount.distinct().sort(), s))) == normalize(""" SELECT DISTINCT accounts.amount as amount FROM accounts ORDER BY amount""")
def test_join_on_single_column(): expr = join(cities[['name']], bank) result = compute(expr, {bank: sql_bank, cities: sql_cities}) assert normalize(str(result)) == """ SELECT bank.id, bank.name, bank.amount FROM bank join cities ON bank.name = cities.name""" expr = join(bank, cities.name) result = compute(expr, {bank: sql_bank, cities: sql_cities}) assert normalize(str(result)) == """
def test_arithmetic(): assert str(computefull(t['amount'] + t['id'], s)) == \ str(sa.select([s.c.amount + s.c.id])) assert str(compute(t['amount'] + t['id'], s)) == str(s.c.amount + s.c.id) assert str(compute(t['amount'] * t['id'], s)) == str(s.c.amount * s.c.id) assert str(compute(t['amount'] * 2, s)) == str(s.c.amount * 2) assert str(compute(2 * t['amount'], s)) == str(2 * s.c.amount) assert (str(compute(~(t['amount'] > 10), s)) == "~(accounts.amount > :amount_1)") assert str(computefull(t['amount'] + t['id'] * 2, s)) == \ str(sa.select([s.c.amount + s.c.id * 2]))
def test_aliased_views_more(): metadata = sa.MetaData() lhs = sa.Table('aaa', metadata, sa.Column('x', sa.Integer), sa.Column('y', sa.Integer), sa.Column('z', sa.Integer)) rhs = sa.Table('bbb', metadata, sa.Column('w', sa.Integer), sa.Column('x', sa.Integer), sa.Column('y', sa.Integer)) L = symbol('L', 'var * {x: int, y: int, z: int}') R = symbol('R', 'var * {w: int, x: int, y: int}') expr = join(by(L.x, y_total=L.y.sum()), R) result = compute(expr, {L: lhs, R: rhs}) assert normalize(str(result)) == normalize(""" SELECT alias.x, alias.y_total, bbb.w, bbb.y FROM (SELECT aaa.x as x, sum(aaa.y) as y_total FROM aaa GROUP BY aaa.x) AS alias JOIN bbb ON alias.x = bbb.x """) expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum()) result2 = compute(expr2, {L: lhs, R: rhs}) assert ( normalize(str(result2)) == normalize(""" SELECT alias_2.w, count(alias_2.x) as count, sum(alias_2.y_total) as total2 FROM (SELECT alias.x, alias.y_total, bbb.w, bbb.y FROM (SELECT aaa.x as x, sum(aaa.y) as y_total FROM aaa GROUP BY aaa.x) AS alias JOIN bbb ON alias.x = bbb.x) AS alias_2 GROUP BY alias_2.w""") or normalize(str(result2)) == normalize(""" SELECT bbb.w, count(alias.x) as count, sum(alias.y_total) as total2 FROM (SELECT aaa.x as x, sum(aaa.y) as y_total FROM aaa GROUP BY aaa.x) as alias JOIN bbb ON alias.x = bbb.x GROUP BY bbb.w"""))
def test_join(): metadata = sa.MetaData() lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) rhs = sa.Table('ids', metadata, sa.Column('name', sa.String), sa.Column('id', sa.Integer)) expected = lhs.join(rhs, lhs.c.name == rhs.c.name) expected = select(list(unique(expected.columns, key=lambda c: c.name))).select_from(expected) L = symbol('L', 'var * {name: string, amount: int}') R = symbol('R', 'var * {name: string, id: int}') joined = join(L, R, 'name') result = compute(joined, {L: lhs, R: rhs}) assert normalize(str(result)) == normalize(""" SELECT amounts.name, amounts.amount, ids.id FROM amounts JOIN ids ON amounts.name = ids.name""") assert str(select(result)) == str(select(expected)) # Schemas match assert list(result.c.keys()) == list(joined.fields) # test sort on join result = compute(joined.sort('amount'), {L: lhs, R: rhs}) assert normalize(str(result)) == normalize(""" select anon_1.name, anon_1.amount, anon_1.id from (select amounts.name as name, amounts.amount as amount, ids.id as id from amounts join ids on amounts.name = ids.name) as anon_1 order by anon_1.amount asc""")
def test_math(): result = compute(sin(t.amount), s) assert normalize(str(result)) == normalize(""" SELECT sin(accounts.amount) as amount FROM accounts""") result = compute(floor(t.amount), s) assert normalize(str(result)) == normalize(""" SELECT floor(accounts.amount) as amount FROM accounts""") result = compute(t.amount // 2, s) assert normalize(str(result)) == normalize(""" SELECT floor(accounts.amount / :amount_1) AS amount FROM accounts""")
def test_multi_column_join(): metadata = sa.MetaData() lhs = sa.Table('aaa', metadata, sa.Column('x', sa.Integer), sa.Column('y', sa.Integer), sa.Column('z', sa.Integer)) rhs = sa.Table('bbb', metadata, sa.Column('w', sa.Integer), sa.Column('x', sa.Integer), sa.Column('y', sa.Integer)) L = symbol('L', 'var * {x: int, y: int, z: int}') R = symbol('R', 'var * {w: int, x: int, y: int}') joined = join(L, R, ['x', 'y']) expected = lhs.join(rhs, (lhs.c.x == rhs.c.x) & (lhs.c.y == rhs.c.y)) expected = select(list(unique(expected.columns, key=lambda c: c.name))).select_from(expected) result = compute(joined, {L: lhs, R: rhs}) assert str(result) == str(expected) assert str(select(result)) == str(select(expected)) # Schemas match print(result.c.keys()) print(joined.fields) assert list(result.c.keys()) == list(joined.fields)
def test_clean_complex_join(): metadata = sa.MetaData() lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) rhs = sa.Table('ids', metadata, sa.Column('name', sa.String), sa.Column('id', sa.Integer)) L = symbol('L', 'var * {name: string, amount: int}') R = symbol('R', 'var * {name: string, id: int}') joined = join(L[L.amount > 0], R, 'name') result = compute(joined, {L: lhs, R: rhs}) assert (normalize(str(result)) == normalize(""" SELECT amounts.name, amounts.amount, ids.id FROM amounts JOIN ids ON amounts.name = ids.name WHERE amounts.amount > :amount_1""") or normalize(str(result)) == normalize(""" SELECT amounts.name, amounts.amount, ids.id FROM amounts, (SELECT amounts.name AS name, amounts.amount AS amount FROM amounts WHERE amounts.amount > :amount_1) JOIN ids ON amounts.name = ids.name"""))
def test_count_on_table(): result = compute(t.count(), s) assert normalize(str(result)) == normalize(""" SELECT count(accounts.id) as count_1 FROM accounts""") result = compute(t[t.amount > 0].count(), s) assert (normalize(str(result)) == normalize(""" SELECT count(accounts.id) as count_1 FROM accounts WHERE accounts.amount > :amount_1""") or normalize(str(result)) == normalize(""" SELECT count(alias.id) as count FROM (SELECT accounts.name AS name, accounts.amount AS amount, accounts.id AS id FROM accounts WHERE accounts.amount > :amount_1) as alias"""))
def test_join_count(): ds = datashape.dshape( '{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}') engine = resource('sqlite:///:memory:', dshape=ds) db = symbol('db', ds) expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count() result = compute(expr, {db: engine}, post_compute=False) expected1 = """ SELECT count(alias.x) as count FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b FROM t1 JOIN t2 ON t1.x = t2.a WHERE t1.x > ?) as alias """ expected2 = """ SELECT count(alias2.x) AS count FROM (SELECT alias1.x AS x, alias1.y AS y, t2.b AS b FROM (SELECT t1.x AS x, t1.y AS y FROM t1 WHERE t1.x > ?) AS alias1 JOIN t2 ON alias1.x = t2.a) AS alias2""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_join(): metadata = sa.MetaData() lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) rhs = sa.Table('ids', metadata, sa.Column('name', sa.String), sa.Column('id', sa.Integer)) expected = lhs.join(rhs, lhs.c.name == rhs.c.name) expected = select(list(unique(expected.columns, key=lambda c: c.name))).select_from(expected) L = TableSymbol('L', '{name: string, amount: int}') R = TableSymbol('R', '{name: string, id: int}') joined = join(L, R, 'name') result = compute(joined, {L: lhs, R: rhs}) assert str(result) == str(expected) assert str(select(result)) == str(select(expected)) # Schemas match assert list(result.c.keys()) == list(joined.columns)
def test_sort_compose(): expr = t.name[:5].sort() result = compute(expr, s) expected = """select anon_1.name from (select accounts.name as name from accounts limit :param_1 offset :param_2) as anon_1 order by anon_1.name asc""" assert normalize(str(result)) == normalize(expected) assert (normalize(str(compute(t.sort('name').name[:5], s))) != normalize(expected))
def test_columnwise_on_complex_selection(): assert normalize(str(select(compute(t[t.amount > 0].amount + 1, s)))) == \ normalize(""" SELECT accounts.amount + :amount_1 AS anon_1 FROM accounts WHERE accounts.amount > :amount_2 """)
def test_reductions_on_complex_selections(): assert normalize(str(select(compute(t[t.amount > 0].id.sum(), s)))) == \ normalize(""" SELECT sum(accounts.id) as id_sum FROM accounts WHERE accounts.amount > :amount_1 """)
def test_join_complex_clean(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) sel = select(name).where(name.c.id > 10) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} expr = join(tname[tname.id > 0], tcity, 'id') result = compute(expr, ns) expected1 = """ SELECT name.id, name.name, place.city, place.country FROM name JOIN place ON name.id = place.id WHERE name.id > :id_1""" expected2 = """ SELECT alias.id, alias.name, place.city, place.country FROM (SELECT name.id as id, name.name AS name FROM name WHERE name.id > :id_1) AS alias JOIN place ON alias.id = place.id""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_join_count(): ds = datashape.dshape('{t1: var * {x: int, y: int}, t2: var * {a: int, b: int}}') engine = resource('sqlite:///:memory:', dshape=ds) db = symbol('db', ds) expr = join(db.t1[db.t1.x > -1], db.t2, 'x', 'a').count() result = compute(expr, {db: engine}, post_compute=False) expected1 = """ SELECT count(alias.x) as count FROM (SELECT t1.x AS x, t1.y AS y, t2.b AS b FROM t1 JOIN t2 ON t1.x = t2.a WHERE t1.x > ?) as alias """ expected2 = """ SELECT count(alias2.x) AS __count FROM (SELECT alias1.x AS x, alias1.y AS y, t2.b AS b FROM (SELECT t1.x AS x, t1.y AS y FROM t1 WHERE t1.x > ?) AS alias1 JOIN t2 ON alias1.x = t2.a) AS alias2""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_selection_of_join(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} j = join(tname, tcity, 'id') expr = j[j.city == 'NYC'].name result = compute(expr, ns) assert normalize(str(result)) == normalize(""" SELECT name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""")
def test_join_complex_clean(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) sel = select(name).where(name.c.id > 10) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} expr = join(tname[tname.id > 0], tcity, 'id') result = compute(expr, ns) assert normalize(str(result)) == normalize(""" SELECT name.id, name.name, place.city, place.country FROM name JOIN place ON name.id = place.id WHERE name.id > :id_1""")
def test_coerce(): expr = t.amount.coerce(to='int64') expected = """SELECT cast(accounts.amount AS BIGINT) AS amount FROM accounts""" result = compute(expr, s) assert normalize(str(result)) == normalize(expected)
def test_clean_complex_join(): metadata = sa.MetaData() lhs = sa.Table('amounts', metadata, sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) rhs = sa.Table('ids', metadata, sa.Column('name', sa.String), sa.Column('id', sa.Integer)) L = symbol('L', 'var * {name: string, amount: int}') R = symbol('R', 'var * {name: string, id: int}') joined = join(L[L.amount > 0], R, 'name') result = compute(joined, {L: lhs, R: rhs}) expected1 = """ SELECT amounts.name, amounts.amount, ids.id FROM amounts JOIN ids ON amounts.name = ids.name WHERE amounts.amount > :amount_1""" expected2 = """ SELECT alias.name, alias.amount, ids.id FROM (SELECT amounts.name AS name, amounts.amount AS amount FROM amounts WHERE amounts.amount > :amount_1) AS alias JOIN ids ON alias.name = ids.name""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_projection_of_join(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) expr = join(tname, tcity[tcity.city == 'NYC'], 'id')[['country', 'name']] ns = {tname: name, tcity: city} assert normalize(str(compute(expr, ns))) == normalize(""" SELECT place.country, name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""")
def test_transform_then_project_single_column(): expr = transform(t, foo=t.id + 1)[['foo', 'id']] result = normalize(str(compute(expr, s))) expected = normalize("""SELECT accounts.id + :id_1 as foo, accounts.id FROM accounts""") assert result == expected
def test_by(): expr = by(t['name'], total=t['amount'].sum()) result = compute(expr, s) expected = sa.select([s.c.name, sa.sql.functions.sum(s.c.amount).label('total')] ).group_by(s.c.name) assert str(result) == str(expected)
def test_distinct(): result = str(compute(Distinct(t['amount']), s, post_compute=False)) assert 'distinct' in result.lower() assert 'amount' in result.lower() print(result) assert result == str(sa.distinct(s.c.amount))
def test_slice(): start, stop, step = 50, 100, 1 result = str(compute(t[start:stop], s)) # Verifies that compute is translating the query correctly assert result == str(select(s).offset(start).limit(stop)) # Verifies the query against expected SQL query expected = """ SELECT accounts.name, accounts.amount, accounts.id FROM accounts LIMIT :param_1 OFFSET :param_2 """ assert normalize(str(result)) == normalize(str(expected)) # Step size of 1 should be alright compute(t[start:stop:step], s)
def test_by_on_count(): expr = by(t.name, count=t.count()) result = compute(expr, s) assert normalize(str(result)) == normalize(""" SELECT accounts.name, count(accounts.id) AS count FROM accounts GROUP BY accounts.name """)
def test_datetime_to_date(): expr = tdate.occurred_on.date result = str(compute(expr, sdate)) expected = """SELECT DATE(accdate.occurred_on) as occurred_on_date FROM accdate """ assert normalize(result) == normalize(expected)
def test_by_two(): expr = by(tbig[['name', 'sex']], total=tbig['amount'].sum()) result = compute(expr, sbig) expected = (sa.select([sbig.c.name, sbig.c.sex, sa.sql.functions.sum(sbig.c.amount).label('total')]) .group_by(sbig.c.name, sbig.c.sex)) assert str(result) == str(expected)
def test_summary_by(): expr = by(t.name, summary(a=t.amount.sum(), b=t.id.count())) result = str(compute(expr, s)) assert 'sum(accounts.amount) as a' in result.lower() assert 'count(accounts.id) as b' in result.lower() assert 'group by accounts.name' in result.lower()
def test_summary_clean(): t2 = t[t.amount > 0] expr = summary(a=t2.amount.sum(), b=t2.id.count()) result = str(compute(expr, s)) assert normalize(result) == normalize(""" SELECT sum(accounts.amount) as a, count(accounts.id) as b FROM accounts WHERE accounts.amount > :amount_1""")