def test_unsupported_aggregate_functions(alltypes, column, op): t = alltypes w = ibis.window(order_by=t.d) expr = getattr(t[column], op)() proj = t.projection([expr.over(w).name('foo')]) with pytest.raises(com.TranslationError): to_sql(proj)
def test_window_rows_with_max_lookback(con): t = con.table('alltypes') mlb = rows_with_max_lookback(3, ibis.interval(days=3)) w = ibis.trailing_window(mlb, order_by=t.i) expr = t.a.sum().over(w) with pytest.raises(NotImplementedError): to_sql(expr)
def test_unsupported_aggregate_functions(con, column, op): t = con.table('alltypes') w = ibis.window(order_by=t.d) expr = getattr(t[column], op)() proj = t.projection([expr.over(w).name('foo')]) with pytest.raises(com.TranslationError): to_sql(proj)
def test_cumulative_functions(alltypes, cumulative, static): t = alltypes w = ibis.window(order_by=t.d) actual = cumulative(t, w).name('foo') expected = static(t, w).over(ibis.cumulative_window()).name('foo') expr1 = t.projection(actual) expr2 = t.projection(expected) assert to_sql(expr1) == to_sql(expr2)
def test_unsupported_aggregate_functions(self): t = self.con.table('alltypes') w = ibis.window(order_by=t.d) exprs = [ t.f.approx_nunique(), t.f.approx_median(), t.g.group_concat(), ] for expr in exprs: with self.assertRaises(com.TranslationError): proj = t.projection([expr.over(w).name('foo')]) to_sql(proj)
def test_isin_notin_in_select(self): filtered = self.table[self.table.g.isin(["foo", "bar"])] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE `g` IN ('foo', 'bar')""" assert result == expected filtered = self.table[self.table.g.notin(["foo", "bar"])] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE `g` NOT IN ('foo', 'bar')""" assert result == expected
def test_nested_join_multiple_ctes(): ratings = ibis.table( [ ('userid', 'int64'), ('movieid', 'int64'), ('rating', 'int8'), ('timestamp', 'string'), ], name='ratings', ) movies = ibis.table( [('movieid', 'int64'), ('title', 'string')], name='movies' ) expr = ratings.timestamp.cast('timestamp') ratings2 = ratings['userid', 'movieid', 'rating', expr.name('datetime')] joined2 = ratings2.join(movies, ['movieid'])[ratings2, movies['title']] joined3 = joined2.filter( [joined2.userid == 118205, joined2.datetime.year() > 2001] ) top_user_old_movie_ids = joined3.filter( [joined3.userid == 118205, joined3.datetime.year() < 2009] )[['movieid']] # projection from a filter was hiding an insidious bug, so we're disabling # that for now see issue #1295 cond = joined3.movieid.isin(top_user_old_movie_ids.movieid) result = joined3[cond] expected = """\ WITH t0 AS ( SELECT `userid`, `movieid`, `rating`, CAST(`timestamp` AS timestamp) AS `datetime` FROM ratings ), t1 AS ( SELECT t0.*, t5.`title` FROM t0 INNER JOIN movies t5 ON t0.`movieid` = t5.`movieid` ) SELECT t2.* FROM ( SELECT t1.* FROM t1 WHERE (t1.`userid` = 118205) AND (extract(t1.`datetime`, 'year') > 2001) ) t2 WHERE t2.`movieid` IN ( SELECT `movieid` FROM ( SELECT t1.* FROM t1 WHERE (t1.`userid` = 118205) AND (extract(t1.`datetime`, 'year') > 2001) AND (t1.`userid` = 118205) AND (extract(t1.`datetime`, 'year') < 2009) ) t4 )""" compiled_result = to_sql(result) assert compiled_result == expected
def test_join_aliasing(): test = ibis.table([ ('a', 'int64'), ('b', 'int64'), ('c', 'int64'), ], name='test_table') test = test.mutate(d=test.a + 20) test2 = test[test.d, test.c] idx = (test2.d / 15).cast('int64').name('idx') test3 = (test2.groupby([test2.d, idx, test2.c]).aggregate(row_count=test2.count())) test3_totals = test3.groupby( test3.d).aggregate(total=test3.row_count.sum()) test4 = test3.join(test3_totals, test3.d == test3_totals.d)[test3, test3_totals.total] test5 = test4[test4.row_count < test4.total / 2] agg = test.groupby([test.d, test.b]).aggregate(count=test.count(), unique=test.c.nunique()).view() joined = agg.join(test5, agg.d == test5.d)[agg, test5.total] result = joined result = to_sql(result) expected = """\ WITH t0 AS ( SELECT *, `a` + 20 AS `d` FROM test_table ), t1 AS ( SELECT `d`, `c` FROM t0 ), t2 AS ( SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(*) AS `row_count` FROM t1 GROUP BY 1, 2, 3 ) SELECT t3.*, t4.`total` FROM ( SELECT `d`, `b`, count(*) AS `count`, count(DISTINCT `c`) AS `unique` FROM t0 GROUP BY 1, 2 ) t3 INNER JOIN ( SELECT t5.* FROM ( SELECT t2.*, t8.`total` FROM t2 INNER JOIN ( SELECT `d`, sum(`row_count`) AS `total` FROM t2 GROUP BY 1 ) t8 ON t2.`d` = t8.`d` ) t5 WHERE t5.`row_count` < (t5.`total` / 2) ) t4 ON t3.`d` = t4.`d`""" assert result == expected
def test_isin_notin_in_select(self): values = ['foo', 'bar'] values_formatted = tuple(set(values)) filtered = self.table[self.table.g.isin(values)] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE `g` IN {}""" assert result == expected.format(values_formatted) filtered = self.table[self.table.g.notin(values)] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE `g` NOT IN {}""" assert result == expected.format(values_formatted)
def test_identical_to(self): t = self.con.table('functional_alltypes') expr = t.tinyint_col.identical_to(t.double_col) result = to_sql(expr) expected = """\ SELECT `tinyint_col` IS NOT DISTINCT FROM `double_col` AS `tmp` FROM functional_alltypes""" assert result == expected
def test_is_parens_identical_to(): t = ibis.table([('a', 'string'), ('b', 'string')], 'table') expr = t[t.a.identical_to(None) == t.b.identical_to(None)] result = to_sql(expr) expected = """\ SELECT * FROM `table` WHERE (`a` IS NOT DISTINCT FROM NULL) = (`b` IS NOT DISTINCT FROM NULL)""" assert result == expected
def test_cumulative_functions(con): t = con.table('alltypes') w = ibis.window(order_by=t.d) exprs = [ (t.f.cumsum().over(w), t.f.sum().over(w)), (t.f.cummin().over(w), t.f.min().over(w)), (t.f.cummax().over(w), t.f.max().over(w)), (t.f.cummean().over(w), t.f.mean().over(w)), ] for cumulative, static in exprs: actual = cumulative.name('foo') expected = static.over(ibis.cumulative_window()).name('foo') expr1 = t.projection(actual) expr2 = t.projection(expected) assert to_sql(expr1) == to_sql(expr2)
def compile(expr, params=None): """Force compilation of expression. Returns ------- str """ from ibis.impala.compiler import to_sql return to_sql(expr, dialect.make_context(params=params))
def test_join_no_predicates_for_impala(self): # Impala requires that joins without predicates be written explicitly # as CROSS JOIN, since result sets can accidentally get too large if a # query is executed before predicates are written t1 = self.con.table('star1') t2 = self.con.table('star2') joined2 = t1.cross_join(t2)[[t1]] expected = """SELECT t0.* FROM star1 t0 CROSS JOIN star2 t1""" result2 = to_sql(joined2) assert result2 == expected for jtype in ['inner_join', 'left_join', 'outer_join']: joined = getattr(t1, jtype)(t2)[[t1]] result = to_sql(joined) assert result == expected
def compile(expr, params=None): """ Force compilation of expression as though it were an expression depending on Impala. Note you can also call expr.compile() Returns ------- compiled : string """ from ibis.impala.compiler import to_sql return to_sql(expr, dialect.make_context(params=params))
def test_relabel_projection(self): # GH #551 types = ['int32', 'string', 'double'] table = ibis.table(zip(['foo', 'bar', 'baz'], types), 'table') relabeled = table.relabel({'foo': 'one', 'baz': 'three'}) result = to_sql(relabeled) expected = """\ SELECT `foo` AS `one`, `bar`, `baz` AS `three` FROM `table`""" assert result == expected
def test_relabel_projection(self): # GH #551 types = ['int32', 'string', 'double'] table = ibis.table(zip(['foo', 'bar', 'baz'], types), name='table') relabeled = table.relabel({'foo': 'one', 'baz': 'three'}) result = to_sql(relabeled) expected = """\ SELECT `foo` AS `one`, `bar`, `baz` AS `three` FROM `table`""" assert result == expected
def test_is_parens(method, sql): t = ibis.table([('a', 'string'), ('b', 'string')], 'table') func = operator.methodcaller(method) expr = t[func(t.a) == func(t.b)] result = to_sql(expr) expected = """\ SELECT * FROM `table` WHERE (`a` {sql} NULL) = (`b` {sql} NULL)""".format(sql=sql) assert result == expected
def test_is_parens(method, sql): t = ibis.table([('a', 'string'), ('b', 'string')], 'table') func = operator.methodcaller(method) expr = t[func(t.a) == func(t.b)] result = to_sql(expr) expected = """\ SELECT * FROM `table` WHERE (`a` {sql} NULL) = (`b` {sql} NULL)""".format( sql=sql ) assert result == expected
def test_join_with_nested_xor_condition(): t1 = ibis.table([('a', 'string'), ('b', 'string')], 't') t2 = t1.view() joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)]) expr = joined[t1] expected = """\ SELECT t0.* FROM t t0 INNER JOIN t t1 ON (t0.`a` = t1.`a`) AND (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`)))""" # noqa: E501 assert to_sql(expr) == expected
def test_logically_negate_complex_boolean_expr(): t = ibis.table( [('a', 'string'), ('b', 'double'), ('c', 'int64'), ('d', 'string')], name='t', ) def f(t): return t.a.isin(['foo']) & t.c.notnull() expr = f(t) result = to_sql(~expr) expected = """\ SELECT NOT (`a` IN ('foo') AND (`c` IS NOT NULL)) AS `tmp` FROM t""" assert result == expected
def _check_impala_output_types_match(self, table): query = to_sql(table) t = self.con.sql(query) def _clean_type(x): if isinstance(x, Category): x = x.to_integer_type() return x left, right = t.schema(), table.schema() for i, (n, l, r) in enumerate(zip(left.names, left.types, right.types)): l = _clean_type(l) r = _clean_type(r) if l != r: pytest.fail("Value for {0} had left type {1}" " and right type {2}".format(n, l, r))
def test_timestamp_extract_field(self): fields = ["year", "month", "day", "hour", "minute", "second", "millisecond"] cases = [(getattr(self.table.i, field)(), "extract(`i`, '{0}')".format(field)) for field in fields] self._check_expr_cases(cases) # integration with SQL translation expr = self.table[ self.table.i.year().name("year"), self.table.i.month().name("month"), self.table.i.day().name("day") ] result = to_sql(expr) expected = """SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`, extract(`i`, 'day') AS `day` FROM alltypes""" assert result == expected
def test_nested_joins_single_cte(): t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t') counts = t.group_by('uuid').size() last_visit = t.group_by('uuid').aggregate(last_visit=t.ts.max()) max_counts = counts.group_by('uuid').aggregate( max_count=counts['count'].max() ) main_kw = max_counts.left_join( counts, ['uuid', max_counts.max_count == counts['count']] ).projection([counts]) result = main_kw.left_join(last_visit, 'uuid').projection([ main_kw, last_visit.last_visit, ]) expected = """\ WITH t0 AS ( SELECT `uuid`, count(*) AS `count` FROM t GROUP BY 1 ) SELECT t1.*, t2.`last_visit` FROM ( SELECT t0.* FROM ( SELECT `uuid`, max(`count`) AS `max_count` FROM t0 GROUP BY 1 ) t3 LEFT OUTER JOIN t0 ON t3.`uuid` = t0.`uuid` AND t3.`max_count` = t0.`count` ) t1 LEFT OUTER JOIN ( SELECT `uuid`, max(`ts`) AS `last_visit` FROM t GROUP BY 1 ) t2 ON t1.`uuid` = t2.`uuid`""" compiled_result = to_sql(result) assert compiled_result == expected
def _check_impala_output_types_match(self, table): query = to_sql(table) t = self.con.sql(query) def _clean_type(x): if isinstance(x, Category): x = x.to_integer_type() return x left, right = t.schema(), table.schema() for i, (n, l, r) in enumerate(zip(left.names, left.types, right.types)): l = _clean_type(l) r = _clean_type(r) if l != r: pytest.fail('Value for {0} had left type {1}' ' and right type {2}'.format(n, l, r))
def test_multiple_filters(): t = ibis.table([('a', 'int64'), ('b', 'string')], name='t0') filt = t[t.a < 100] expr = filt[filt.a == filt.a.max()] result = to_sql(expr) expected = """\ SELECT * FROM ( SELECT * FROM t0 WHERE `a` < 100 ) t0 WHERE `a` = ( SELECT max(`a`) AS `max` FROM t0 WHERE `a` < 100 )""" assert result == expected
def test_timestamp_extract_field(self): fields = ['year', 'month', 'day', 'hour', 'minute', 'second', 'millisecond'] cases = [(getattr(self.table.i, field)(), "extract(`i`, '{0}')".format(field)) for field in fields] self._check_expr_cases(cases) # integration with SQL translation expr = self.table[self.table.i.year().name('year'), self.table.i.month().name('month'), self.table.i.day().name('day')] result = to_sql(expr) expected = \ """SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`, extract(`i`, 'day') AS `day` FROM alltypes""" assert result == expected
def test_bucket_assign_labels(self): buckets = [0, 10, 25, 50] bucket = self.table.f.bucket(buckets, include_under=True) size = self.table.group_by(bucket.name('tier')).size() labelled = size.tier.label(['Under 0', '0 to 10', '10 to 25', '25 to 50'], nulls='error').name('tier2') expr = size[labelled, size['count']] expected = """\ SELECT CASE `tier` WHEN 0 THEN 'Under 0' WHEN 1 THEN '0 to 10' WHEN 2 THEN '10 to 25' WHEN 3 THEN '25 to 50' ELSE 'error' END AS `tier2`, `count` FROM ( SELECT CASE WHEN `f` < 0 THEN 0 WHEN (`f` >= 0) AND (`f` < 10) THEN 1 WHEN (`f` >= 10) AND (`f` < 25) THEN 2 WHEN (`f` >= 25) AND (`f` <= 50) THEN 3 ELSE NULL END AS `tier`, count(*) AS `count` FROM alltypes GROUP BY 1 ) t0""" result = to_sql(expr) assert result == expected self.assertRaises(ValueError, size.tier.label, ['a', 'b', 'c']) self.assertRaises(ValueError, size.tier.label, ['a', 'b', 'c', 'd', 'e'])
def test_nested_join_base(): t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t') counts = t.group_by('uuid').size() max_counts = counts.group_by('uuid').aggregate( max_count=lambda x: x['count'].max()) result = max_counts.left_join(counts, 'uuid').projection([counts]) compiled_result = to_sql(result) expected = """\ WITH t0 AS ( SELECT `uuid`, count(*) AS `count` FROM t GROUP BY 1 ) SELECT t0.* FROM ( SELECT `uuid`, max(`count`) AS `max_count` FROM t0 GROUP BY 1 ) t1 LEFT OUTER JOIN t0 ON t1.`uuid` = t0.`uuid`""" assert compiled_result == expected
def test_bucket_assign_labels(self): buckets = [0, 10, 25, 50] bucket = self.table.f.bucket(buckets, include_under=True) size = self.table.group_by(bucket.name("tier")).size() labelled = size.tier.label(["Under 0", "0 to 10", "10 to 25", "25 to 50"], nulls="error").name("tier2") expr = size[labelled, size["count"]] expected = """\ SELECT CASE `tier` WHEN 0 THEN 'Under 0' WHEN 1 THEN '0 to 10' WHEN 2 THEN '10 to 25' WHEN 3 THEN '25 to 50' ELSE 'error' END AS `tier2`, `count` FROM ( SELECT CASE WHEN `f` < 0 THEN 0 WHEN (`f` >= 0) AND (`f` < 10) THEN 1 WHEN (`f` >= 10) AND (`f` < 25) THEN 2 WHEN (`f` >= 25) AND (`f` <= 50) THEN 3 ELSE NULL END AS `tier`, count(*) AS `count` FROM alltypes GROUP BY 1 ) t0""" result = to_sql(expr) assert result == expected self.assertRaises(ValueError, size.tier.label, ["a", "b", "c"]) self.assertRaises(ValueError, size.tier.label, ["a", "b", "c", "d", "e"])
def test_nested_join_base(): t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t') counts = t.group_by('uuid').size() max_counts = counts.group_by('uuid').aggregate( max_count=lambda x: x['count'].max() ) result = max_counts.left_join(counts, 'uuid').projection([counts]) compiled_result = to_sql(result) expected = """\ WITH t0 AS ( SELECT `uuid`, count(*) AS `count` FROM t GROUP BY 1 ) SELECT t0.* FROM ( SELECT `uuid`, max(`count`) AS `max_count` FROM t0 GROUP BY 1 ) t1 LEFT OUTER JOIN t0 ON t1.`uuid` = t0.`uuid`""" assert compiled_result == expected
def assert_cases_equality(self, cases): for expr, expected in cases: result = self.con.execute(expr) assert result == expected, to_sql(expr)
def assert_sql_equal(expr, expected): result = to_sql(expr) assert result == expected
def _check_sql(self, expr, expected): result = to_sql(expr) assert result == expected
def test_join_aliasing(): test = ibis.table( [('a', 'int64'), ('b', 'int64'), ('c', 'int64')], name='test_table' ) test = test.mutate(d=test.a + 20) test2 = test[test.d, test.c] idx = (test2.d / 15).cast('int64').name('idx') test3 = test2.groupby([test2.d, idx, test2.c]).aggregate( row_count=test2.count() ) test3_totals = test3.groupby(test3.d).aggregate( total=test3.row_count.sum() ) test4 = test3.join(test3_totals, test3.d == test3_totals.d)[ test3, test3_totals.total ] test5 = test4[test4.row_count < test4.total / 2] agg = ( test.groupby([test.d, test.b]) .aggregate(count=test.count(), unique=test.c.nunique()) .view() ) joined = agg.join(test5, agg.d == test5.d)[agg, test5.total] result = joined result = to_sql(result) expected = """\ WITH t0 AS ( SELECT *, `a` + 20 AS `d` FROM test_table ), t1 AS ( SELECT `d`, `c` FROM t0 ), t2 AS ( SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(*) AS `row_count` FROM t1 GROUP BY 1, 2, 3 ) SELECT t3.*, t4.`total` FROM ( SELECT `d`, `b`, count(*) AS `count`, count(DISTINCT `c`) AS `unique` FROM t0 GROUP BY 1, 2 ) t3 INNER JOIN ( SELECT t5.* FROM ( SELECT t2.*, t8.`total` FROM t2 INNER JOIN ( SELECT `d`, sum(`row_count`) AS `total` FROM t2 GROUP BY 1 ) t8 ON t2.`d` = t8.`d` ) t5 WHERE t5.`row_count` < (t5.`total` / 2) ) t4 ON t3.`d` = t4.`d`""" assert result == expected
def test_decimal_builtins(con, expr, expected): result = con.execute(expr) assert result == expected, to_sql(expr)
def test_timestamp_builtins(con, expr, expected): result = con.execute(expr) assert result == expected, to_sql(expr)
def test_identical_to_special_case(self): expr = ibis.NA.cast('int64').identical_to(ibis.NA.cast('int64')) result = to_sql(expr) assert result == 'SELECT TRUE AS `tmp`'
def _compare_sql(self, e1, e2): s1 = to_sql(e1) s2 = to_sql(e2) assert s1 == s2