def test_selection_of_join(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} j = join(tname, tcity, 'id') expr = j[j.city == 'NYC'].name result = compute(expr, ns) assert normalize(str(result)) == normalize(""" SELECT name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""")
def test_join_complex_clean(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} expr = join(tname[tname.id > 0], tcity, 'id') result = compute(expr, ns) expected1 = """ SELECT name.id, name.name, place.city, place.country FROM name JOIN place ON name.id = place.id WHERE name.id > :id_1""" expected2 = """ SELECT alias.id, alias.name, place.city, place.country FROM (SELECT name.id as id, name.name AS name FROM name WHERE name.id > :id_1) AS alias JOIN place ON alias.id = place.id""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_selection_of_join(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} j = join(tname, tcity, 'id') expr = j[j.city == 'NYC'].name result = compute(expr, ns) assert normalize(str(result)) == normalize(""" SELECT name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""")
def test_lower_column(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) assert lower_column(name.c.id) is name.c.id assert lower_column(select(name).c.id) is name.c.id j = name.join(city, name.c.id == city.c.id) col = [c for c in j.columns if c.name == 'country'][0] assert lower_column(col) is city.c.country
def test_clean_join(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) friends = sa.Table( 'friends', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) tcity = symbol('city', discover(city)) tfriends = symbol('friends', discover(friends)) tname = symbol('name', discover(name)) ns = {tname: name, tfriends: friends, tcity: city} expr = join(tfriends, tname, 'a', 'id') assert normalize(str(compute(expr, ns))) == normalize(""" SELECT friends.a, friends.b, name.name FROM friends JOIN name on friends.a = name.id""") expr = join(join(tfriends, tname, 'a', 'id'), tcity, 'a', 'id') result = compute(expr, ns) expected1 = """ SELECT friends.a, friends.b, name.name, place.city, place.country FROM friends JOIN name ON friends.a = name.id JOIN place ON friends.a = place.id """ expected2 = """ SELECT alias.a, alias.b, alias.name, place.city, place.country FROM (SELECT friends.a AS a, friends.b AS b, name.name AS name FROM friends JOIN name ON friends.a = name.id) AS alias JOIN place ON alias.a = place.id """ assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_clean_join(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) friends = sa.Table('friends', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) tcity = symbol('city', discover(city)) tfriends = symbol('friends', discover(friends)) tname = symbol('name', discover(name)) ns = {tname: name, tfriends: friends, tcity: city} expr = join(tfriends, tname, 'a', 'id') assert normalize(str(compute(expr, ns))) == normalize(""" SELECT friends.a, friends.b, name.name FROM friends JOIN name on friends.a = name.id""") expr = join(join(tfriends, tname, 'a', 'id'), tcity, 'a', 'id') result = compute(expr, ns) expected1 = """ SELECT friends.a, friends.b, name.name, place.city, place.country FROM friends JOIN name ON friends.a = name.id JOIN place ON friends.a = place.id """ expected2 = """ SELECT alias.a, alias.b, alias.name, place.city, place.country FROM (SELECT friends.a AS a, friends.b AS b, name.name AS name FROM friends JOIN name ON friends.a = name.id) AS alias JOIN place ON alias.a = place.id """ assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def data(): # make the engine engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) # name table name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) name.create() # city table city = sa.Table( 'city', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) city.create() s = symbol('s', discover(engine)) return { 'engine': engine, 'metadata': metadata, 'name': name, 'city': city, 's': s }
def test_aliased_views_with_computation(): engine = sa.create_engine('sqlite:///:memory:') df_aaa = DataFrame({ 'x': [1, 2, 3, 2, 3], 'y': [2, 1, 2, 3, 1], 'z': [3, 3, 3, 1, 2] }) df_bbb = DataFrame({ 'w': [1, 2, 3, 2, 3], 'x': [2, 1, 2, 3, 1], 'y': [3, 3, 3, 1, 2] }) df_aaa.to_sql('aaa', engine) df_bbb.to_sql('bbb', engine) metadata = sa.MetaData(engine) metadata.reflect() sql_aaa = metadata.tables['aaa'] sql_bbb = metadata.tables['bbb'] L = symbol('aaa', discover(df_aaa)) R = symbol('bbb', discover(df_bbb)) expr = join(by(L.x, y_total=L.y.sum()), R) a = compute(expr, {L: df_aaa, R: df_bbb}) b = compute(expr, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum()) a = compute(expr2, {L: df_aaa, R: df_bbb}) b = compute(expr2, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr3 = by(expr.x, count=expr.y_total.count()) a = compute(expr3, {L: df_aaa, R: df_bbb}) b = compute(expr3, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr4 = join(expr2, R) a = compute(expr4, {L: df_aaa, R: df_bbb}) b = compute(expr4, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) """ # Takes a while
def test_aliased_views_with_computation(): engine = sa.create_engine('sqlite:///:memory:') df_aaa = DataFrame({'x': [1, 2, 3, 2, 3], 'y': [2, 1, 2, 3, 1], 'z': [3, 3, 3, 1, 2]}) df_bbb = DataFrame({'w': [1, 2, 3, 2, 3], 'x': [2, 1, 2, 3, 1], 'y': [3, 3, 3, 1, 2]}) df_aaa.to_sql('aaa', engine) df_bbb.to_sql('bbb', engine) metadata = sa.MetaData(engine) metadata.reflect() sql_aaa = metadata.tables['aaa'] sql_bbb = metadata.tables['bbb'] L = symbol('aaa', discover(df_aaa)) R = symbol('bbb', discover(df_bbb)) expr = join(by(L.x, y_total=L.y.sum()), R) a = compute(expr, {L: df_aaa, R: df_bbb}) b = compute(expr, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum()) a = compute(expr2, {L: df_aaa, R: df_bbb}) b = compute(expr2, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr3 = by(expr.x, count=expr.y_total.count()) a = compute(expr3, {L: df_aaa, R: df_bbb}) b = compute(expr3, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) expr4 = join(expr2, R) a = compute(expr4, {L: df_aaa, R: df_bbb}) b = compute(expr4, {L: sql_aaa, R: sql_bbb}) assert into(set, a) == into(set, b) """ # Takes a while
def test_projection_of_join(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) expr = join(tname, tcity[tcity.city == 'NYC'], 'id')[['country', 'name']] ns = {tname: name, tcity: city} result = compute(expr, ns) expected1 = """ SELECT place.country, name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""" expected2 = """ SELECT alias.country, name.name FROM name JOIN (SELECT place.id AS id, place.city AS city, place.country AS country FROM place WHERE place.city = :city_1) AS alias ON name.id = alias_6.id""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_lower_column(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) assert lower_column(name.c.id) is name.c.id assert lower_column(select(name).c.id) is name.c.id j = name.join(city, name.c.id == city.c.id) col = [c for c in j.columns if c.name == 'country'][0] assert lower_column(col) is city.c.country
def test_join_complex_clean(): metadata = sa.MetaData() name = sa.Table( 'name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table( 'place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) ns = {tname: name, tcity: city} expr = join(tname[tname.id > 0], tcity, 'id') result = compute(expr, ns) expected1 = """ SELECT name.id, name.name, place.city, place.country FROM name JOIN place ON name.id = place.id WHERE name.id > :id_1""" expected2 = """ SELECT alias.id, alias.name, place.city, place.country FROM (SELECT name.id as id, name.name AS name FROM name WHERE name.id > :id_1) AS alias JOIN place ON alias.id = place.id""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_projection_of_join(): metadata = sa.MetaData() name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) city = sa.Table('place', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) tname = symbol('name', discover(name)) tcity = symbol('city', discover(city)) expr = join(tname, tcity[tcity.city == 'NYC'], 'id')[['country', 'name']] ns = {tname: name, tcity: city} result = compute(expr, ns) expected1 = """ SELECT place.country, name.name FROM name JOIN place ON name.id = place.id WHERE place.city = :city_1""" expected2 = """ SELECT alias.country, name.name FROM name JOIN (SELECT place.id AS id, place.city AS city, place.country AS country FROM place WHERE place.city = :city_1) AS alias ON name.id = alias_6.id""" assert (normalize(str(result)) == normalize(expected1) or normalize(str(result)) == normalize(expected2))
def test_date_grouper_repeats(): columns = [sa.Column('amount', sa.REAL), sa.Column('ds', sa.TIMESTAMP)] data = sa.Table('t', sa.MetaData(), *columns) t = symbol('t', discover(data)) expr = by(t.ds.year, avg_amt=t.amount.mean()) result = str(compute(expr, data)) # FYI spark sql isn't able to parse this correctly expected = """SELECT EXTRACT(year FROM t.ds) as ds_year, AVG(t.amount) as avg_amt FROM t GROUP BY ds_year """ assert normalize(result) == normalize(expected)
def test_join_suffixes(): metadata = sa.MetaData() T = sa.Table('tab', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) t = symbol('tab', discover(T)) suffixes = '_l', '_r' expr = join(t, t, 'a', suffixes=suffixes) result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT tab{l}.a, tab{l}.b, tab{r}.b FROM tab AS tab{l} JOIN tab AS tab{r} ON tab{l}.a = tab{r}.a """.format(l=suffixes[0], r=suffixes[1]))
def test_join_on_same_table(): metadata = sa.MetaData() T = sa.Table( 'tab', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) t = symbol('tab', discover(T)) expr = join(t, t, 'a') result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT tab_left.a, tab_left.b, tab_right.b FROM tab AS tab_left JOIN tab AS tab_right ON tab_left.a = tab_right.a """) expr = join(t, t, 'a').b_left.sum() result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" with alias as (select tab_left.b as b from tab as tab_left join tab as tab_right on tab_left.a = tab_right.a) select sum(alias.b) as b_left_sum from alias""") expr = join(t, t, 'a') expr = summary(total=expr.a.sum(), smallest=expr.b_right.min()) result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT min(tab_right.b) as smallest, sum(tab_left.a) as total FROM tab AS tab_left JOIN tab AS tab_right ON tab_left.a = tab_right.a """)
def test_join_suffixes(): metadata = sa.MetaData() T = sa.Table( 'tab', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) t = symbol('tab', discover(T)) suffixes = '_l', '_r' expr = join(t, t, 'a', suffixes=suffixes) result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT tab{l}.a, tab{l}.b, tab{r}.b FROM tab AS tab{l} JOIN tab AS tab{r} ON tab{l}.a = tab{r}.a """.format(l=suffixes[0], r=suffixes[1]))
def test_join_on_same_table(): metadata = sa.MetaData() T = sa.Table('tab', metadata, sa.Column('a', sa.Integer), sa.Column('b', sa.Integer), ) t = symbol('tab', discover(T)) expr = join(t, t, 'a') result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT tab_left.a, tab_left.b, tab_right.b FROM tab AS tab_left JOIN tab AS tab_right ON tab_left.a = tab_right.a """) expr = join(t, t, 'a').b_left.sum() result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" with alias as (select tab_left.b as b from tab as tab_left join tab as tab_right on tab_left.a = tab_right.a) select sum(alias.b) as b_left_sum from alias""") expr = join(t, t, 'a') expr = summary(total=expr.a.sum(), smallest=expr.b_right.min()) result = compute(expr, {t: T}) assert normalize(str(result)) == normalize(""" SELECT min(tab_right.b) as smallest, sum(tab_left.a) as total FROM tab AS tab_left JOIN tab AS tab_right ON tab_left.a = tab_right.a """)
def data(): # make the engine engine = sa.create_engine('sqlite:///:memory:') metadata = sa.MetaData(engine) # name table name = sa.Table('name', metadata, sa.Column('id', sa.Integer), sa.Column('name', sa.String), ) name.create() # city table city = sa.Table('city', metadata, sa.Column('id', sa.Integer), sa.Column('city', sa.String), sa.Column('country', sa.String), ) city.create() s = symbol('s', discover(engine)) return {'engine': engine, 'metadata': metadata, 'name': name, 'city': city, 's': s}
def test_computation_directly_on_sqlalchemy_Tables(data): name = data['name'] s = symbol('s', discover(name)) result = into(list, compute(s.id + 1, name)) assert not isinstance(result, sa.sql.Selectable) assert list(result) == []
metadata = data['metadata'] name = data['name'] s = symbol('s', discover(metadata)) result = compute(s.name, {s: metadata}, post_compute=False) assert result == name sql_bank = sa.Table('bank', sa.MetaData(), sa.Column('id', sa.Integer), sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) sql_cities = sa.Table('cities', sa.MetaData(), sa.Column('name', sa.String), sa.Column('city', sa.String)) bank = symbol('bank', discover(sql_bank)) cities = symbol('cities', discover(sql_cities)) def test_aliased_views_with_two_group_bys(): expr = by(bank.name, total=bank.amount.sum()) expr2 = by(expr.total, count=expr.name.count()) result = compute(expr2, {bank: sql_bank, cities: sql_cities}) assert normalize(str(result)) == normalize(""" SELECT alias.total, count(alias.name) as count FROM (SELECT bank.name AS name, sum(bank.amount) AS total FROM bank GROUP BY bank.name) as alias GROUP BY alias.total
def test_computation_directly_on_metadata(data): metadata = data['metadata'] name = data['name'] s = symbol('s', discover(metadata)) result = compute(s.name, {s: metadata}, post_compute=False) assert result == name
def test_discover(): ds = 'var * {x: int, y: int, z: int}' a = symbol('a', ds) assert discover(a) == dshape(ds)
def test_discover(): schema = '{x: int, y: int, z: int}' a = TableSymbol('a', schema) assert discover(a) == var * schema
def test_computation_directly_on_metadata(data): metadata = data['metadata'] name = data['name'] s = symbol('s', discover(metadata)) result = compute(s.name, {s: metadata}, post_compute=False) assert result == name sql_bank = sa.Table('bank', sa.MetaData(), sa.Column('id', sa.Integer), sa.Column('name', sa.String), sa.Column('amount', sa.Integer)) sql_cities = sa.Table('cities', sa.MetaData(), sa.Column('name', sa.String), sa.Column('city', sa.String)) bank = symbol('bank', discover(sql_bank)) cities = symbol('cities', discover(sql_cities)) def test_aliased_views_with_two_group_bys(): expr = by(bank.name, total=bank.amount.sum()) expr2 = by(expr.total, count=expr.name.count()) result = compute(expr2, {bank: sql_bank, cities: sql_cities}) assert normalize(str(result)) == normalize(""" SELECT alias.total, count(alias.name) as count FROM (SELECT bank.name AS name, sum(bank.amount) AS total FROM bank GROUP BY bank.name) as alias GROUP BY alias.total
def test_discover(): schema = "{x: int, y: int, z: int}" a = TableSymbol("a", schema) assert discover(a) == var * schema