def test_aggregate_table_count_metric(self): expr = self.con.table('star1').count() result = to_sql(expr) expected = """SELECT count(*) AS `tmp` FROM star1""" assert result == expected # count on more complicated table region = self.con.table('tpch_region') nation = self.con.table('tpch_nation') join_expr = region.r_regionkey == nation.n_regionkey joined = region.inner_join(nation, join_expr) table_ref = joined[nation, region.r_name.name('region')] expr = table_ref.count() result = to_sql(expr) expected = """SELECT count(*) AS `tmp` FROM ( SELECT t2.*, t1.`r_name` AS `region` FROM tpch_region t1 INNER JOIN tpch_nation t2 ON t1.`r_regionkey` = t2.`n_regionkey` ) t0""" assert result == expected
def test_where_with_join(self): t1 = self.con.table('star1') t2 = self.con.table('star2') # This also tests some cases of predicate pushdown what = (t1.inner_join(t2, [t1.foo_id == t2.foo_id]) .projection([t1, t2.value1, t2.value3]) .filter([t1.f > 0, t2.value3 < 1000])) what2 = (t1.inner_join(t2, [t1.foo_id == t2.foo_id]) .filter([t1.f > 0, t2.value3 < 1000]) .projection([t1, t2.value1, t2.value3])) expected_sql = """SELECT t0.*, t1.`value1`, t1.`value3` FROM star1 t0 INNER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id` WHERE t0.`f` > 0 AND t1.`value3` < 1000""" result_sql = to_sql(what) assert result_sql == expected_sql result2_sql = to_sql(what2) assert result2_sql == expected_sql
def test_filter_subquery_derived_reduction(self): t1 = self.con.table('star1') # Reduction can be nested inside some scalar expression pred3 = t1.f > t1[t1.foo_id == 'foo'].f.mean().log() pred4 = t1.f > (t1[t1.foo_id == 'foo'].f.mean().log() + 1) expr3 = t1[pred3] result = to_sql(expr3) expected = """SELECT * FROM star1 WHERE `f` > ( SELECT ln(avg(`f`)) AS `tmp` FROM star1 WHERE `foo_id` = 'foo' )""" assert result == expected expr4 = t1[pred4] result = to_sql(expr4) expected = """SELECT * FROM star1 WHERE `f` > ( SELECT ln(avg(`f`)) + 1 AS `tmp` FROM star1 WHERE `foo_id` = 'foo' )""" assert result == expected
def test_exists_semi_join_case(self): t1, t2 = self.t1, self.t2 cond = (t1.key1 == t2.key1).any() expr = t1[cond] result = to_sql(expr) expected = """SELECT t0.* FROM foo t0 WHERE EXISTS ( SELECT 1 FROM bar t1 WHERE t0.`key1` = t1.`key1` )""" assert result == expected cond2 = ((t1.key1 == t2.key1) & (t2.key2 == 'foo')).any() expr2 = t1[cond2] result = to_sql(expr2) expected = """SELECT t0.* FROM foo t0 WHERE EXISTS ( SELECT 1 FROM bar t1 WHERE t0.`key1` = t1.`key1` AND t1.`key2` = 'foo' )""" assert result == expected
def test_subquery_in_filter_predicate(self): # E.g. comparing against some scalar aggregate value. See Ibis #43 t1 = self.con.table('star1') pred = t1.f > t1.f.mean() expr = t1[pred] # This brought out another expression rewriting bug, since the filtered # table isn't found elsewhere in the expression. pred2 = t1.f > t1[t1.foo_id == 'foo'].f.mean() expr2 = t1[pred2] result = to_sql(expr) expected = """SELECT * FROM star1 WHERE `f` > ( SELECT avg(`f`) AS `tmp` FROM star1 )""" assert result == expected result = to_sql(expr2) expected = """SELECT * FROM star1 WHERE `f` > ( SELECT avg(`f`) AS `tmp` FROM star1 WHERE `foo_id` = 'foo' )""" assert result == expected
def test_tpch_self_join_failure(self): # duplicating the integration test here region = self.con.table('tpch_region') nation = self.con.table('tpch_nation') customer = self.con.table('tpch_customer') orders = self.con.table('tpch_orders') fields_of_interest = [ region.r_name.name('region'), nation.n_name.name('nation'), orders.o_totalprice.name('amount'), orders.o_orderdate.cast('timestamp').name('odate')] joined_all = ( region.join(nation, region.r_regionkey == nation.n_regionkey) .join(customer, customer.c_nationkey == nation.n_nationkey) .join(orders, orders.o_custkey == customer.c_custkey) [fields_of_interest]) year = joined_all.odate.year().name('year') total = joined_all.amount.sum().cast('double').name('total') annual_amounts = (joined_all .group_by(['region', year]) .aggregate(total)) current = annual_amounts prior = annual_amounts.view() yoy_change = (current.total - prior.total).name('yoy_change') yoy = (current.join(prior, current.year == (prior.year - 1)) [current.region, current.year, yoy_change]) to_sql(yoy)
def test_topk_to_aggregate(self): t = ibis.table([('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')], 'airlines') top = t.dest.topk(10, by=t.arrdelay.mean()) result = to_sql(top) expected = to_sql(top.to_aggregation()) assert result == expected
def test_fuse_projections(self): table = api.table([ ('foo', 'int32'), ('bar', 'int64'), ('value', 'double') ], name='tbl') # Cases where we project in both cases using the base table reference f1 = (table['foo'] + table['bar']).name('baz') pred = table['value'] > 0 table2 = table[table, f1] table2_filtered = table2[pred] f2 = (table2['foo'] * 2).name('qux') f3 = (table['foo'] * 2).name('qux') table3 = table2.projection([table2, f2]) # fusion works even if there's a filter table3_filtered = table2_filtered.projection([table2, f2]) expected = table[table, f1, f3] expected2 = table[pred][table, f1, f3] assert table3.equals(expected) assert table3_filtered.equals(expected2) ex_sql = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux` FROM tbl""" ex_sql2 = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux` FROM tbl WHERE `value` > 0""" table3_sql = to_sql(table3) table3_filt_sql = to_sql(table3_filtered) assert table3_sql == ex_sql assert table3_filt_sql == ex_sql2 # Use the intermediate table refs table3 = table2.projection([table2, f2]) # fusion works even if there's a filter table3_filtered = table2_filtered.projection([table2, f2]) expected = table[table, f1, f3] expected2 = table[pred][table, f1, f3] assert table3.equals(expected) assert table3_filtered.equals(expected2)
def test_unsupported_aggregate_functions(self): t = self.con.table('alltypes') w = ibis.window(order_by=t.d) exprs = [ t.f.approx_nunique(), t.f.approx_median(), t.g.group_concat(), ] for expr in exprs: with self.assertRaises(com.TranslationError): proj = t.projection([expr.over(w).name('foo')]) to_sql(proj)
def test_isin_notin_in_select(self): filtered = self.table[self.table.g.isin(["foo", "bar"])] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE g IN ('foo', 'bar')""" assert result == expected filtered = self.table[self.table.g.notin(["foo", "bar"])] result = to_sql(filtered) expected = """SELECT * FROM alltypes WHERE g NOT IN ('foo', 'bar')""" assert result == expected
def test_fuse_projections(self): table = api.table([('foo', 'int32'), ('bar', 'int64'), ('value', 'double')], name='tbl') # Cases where we project in both cases using the base table reference f1 = (table['foo'] + table['bar']).name('baz') pred = table['value'] > 0 table2 = table[table, f1] table2_filtered = table2[pred] f2 = (table2['foo'] * 2).name('qux') f3 = (table['foo'] * 2).name('qux') table3 = table2.projection([table2, f2]) # fusion works even if there's a filter table3_filtered = table2_filtered.projection([table2, f2]) expected = table[table, f1, f3] expected2 = table[pred][table, f1, f3] assert table3.equals(expected) assert table3_filtered.equals(expected2) ex_sql = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux` FROM tbl""" ex_sql2 = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux` FROM tbl WHERE `value` > 0""" table3_sql = to_sql(table3) table3_filt_sql = to_sql(table3_filtered) assert table3_sql == ex_sql assert table3_filt_sql == ex_sql2 # Use the intermediate table refs table3 = table2.projection([table2, f2]) # fusion works even if there's a filter table3_filtered = table2_filtered.projection([table2, f2]) expected = table[table, f1, f3] expected2 = table[pred][table, f1, f3] assert table3.equals(expected) assert table3_filtered.equals(expected2)
def test_simple_join_formatting(self): t1 = self.con.table('star1') t2 = self.con.table('star2') pred = t1['foo_id'] == t2['foo_id'] pred2 = t1['bar_id'] == t2['foo_id'] cases = [ (t1.inner_join(t2, [pred])[[t1]], """SELECT t0.* FROM star1 t0 INNER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id`"""), (t1.left_join(t2, [pred])[[t1]], """SELECT t0.* FROM star1 t0 LEFT OUTER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id`"""), (t1.outer_join(t2, [pred])[[t1]], """SELECT t0.* FROM star1 t0 FULL OUTER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id`"""), # multiple predicates (t1.inner_join(t2, [pred, pred2])[[t1]], """SELECT t0.* FROM star1 t0 INNER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id` AND t0.`bar_id` = t1.`foo_id`"""), ] for expr, expected_sql in cases: result_sql = to_sql(expr) assert result_sql == expected_sql
def test_where_no_pushdown_possible(self): t1 = self.con.table('star1') t2 = self.con.table('star2') joined = (t1.inner_join(t2, [t1.foo_id == t2.foo_id]) [t1, (t1.f - t2.value1).name('diff')]) filtered = joined[joined.diff > 1] # TODO: I'm not sure if this is exactly what we want expected_sql = """SELECT * FROM ( SELECT t0.*, t0.`f` - t1.`value1` AS `diff` FROM star1 t0 INNER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id` WHERE t0.`f` > 0 AND t1.`value3` < 1000 ) WHERE `diff` > 1""" raise unittest.SkipTest result_sql = to_sql(filtered) assert result_sql == expected_sql
def test_case_in_projection(self): t = self.con.table('alltypes') expr = (t.g.case().when('foo', 'bar').when('baz', 'qux').else_('default').end()) expr2 = (api.case().when(t.g == 'foo', 'bar').when(t.g == 'baz', t.g).end()) proj = t[expr.name('col1'), expr2.name('col2'), t] result = to_sql(proj) expected = """SELECT CASE `g` WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS `col1`, CASE WHEN `g` = 'foo' THEN 'bar' WHEN `g` = 'baz' THEN `g` ELSE NULL END AS `col2`, * FROM alltypes""" assert result == expected
def test_subquery_used_for_self_join(self): # There could be cases that should look in SQL like # WITH t0 as (some subquery) # select ... # from t0 t1 # join t0 t2 # on t1.kind = t2.subkind # ... # However, the Ibis code will simply have an expression (projection or # aggregation, say) built on top of the subquery expression, so we need # to extract the subquery unit (we see that it appears multiple times # in the tree). t = self.con.table('alltypes') agged = t.aggregate([t.f.sum().name('total')], by=['g', 'a', 'b']) view = agged.view() metrics = [(agged.total - view.total).max().name('metric')] reagged = (agged.inner_join(view, [agged.a == view.b]) .aggregate(metrics, by=[agged.g])) result = to_sql(reagged) expected = """WITH t0 AS ( SELECT `g`, `a`, `b`, sum(`f`) AS `total` FROM alltypes GROUP BY 1, 2, 3 ) SELECT t0.`g`, max(t0.`total` - t1.`total`) AS `metric` FROM t0 INNER JOIN t0 t1 ON t0.`a` = t1.`b` GROUP BY 1""" assert result == expected
def test_topk_analysis_bug(self): # GH #398 airlines = ibis.table([('dest', 'string'), ('origin', 'string'), ('arrdelay', 'int32')], 'airlines') dests = ['ORD', 'JFK', 'SFO'] t = airlines[airlines.dest.isin(dests)] delay_filter = t.dest.topk(10, by=t.arrdelay.mean()) expr = t[delay_filter].group_by('origin').size() result = to_sql(expr) expected = """\ SELECT t0.`origin`, count(*) AS `count` FROM airlines t0 LEFT SEMI JOIN ( SELECT `dest`, avg(`arrdelay`) AS `mean` FROM airlines WHERE `dest` IN ('ORD', 'JFK', 'SFO') GROUP BY 1 ORDER BY `mean` DESC LIMIT 10 ) t1 ON t0.`dest` = t1.`dest` WHERE t0.`dest` IN ('ORD', 'JFK', 'SFO') GROUP BY 1""" assert result == expected
def test_where_no_pushdown_possible(self): t1 = self.con.table('star1') t2 = self.con.table('star2') joined = (t1.inner_join( t2, [t1.foo_id == t2.foo_id])[t1, (t1.f - t2.value1).name('diff')]) filtered = joined[joined.diff > 1] # TODO: I'm not sure if this is exactly what we want expected_sql = """SELECT * FROM ( SELECT t0.*, t0.`f` - t1.`value1` AS `diff` FROM star1 t0 INNER JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id` WHERE t0.`f` > 0 AND t1.`value3` < 1000 ) WHERE `diff` > 1""" raise unittest.SkipTest result_sql = to_sql(filtered) assert result_sql == expected_sql
def test_self_join_subquery_distinct_equal(self): region = self.con.table('tpch_region') nation = self.con.table('tpch_nation') j1 = (region.join(nation, region.r_regionkey == nation.n_regionkey)[region, nation]) j2 = (region.join( nation, region.r_regionkey == nation.n_regionkey)[region, nation].view()) expr = (j1.join(j2, j1.r_regionkey == j2.r_regionkey)[j1.r_name, j2.n_name]) result = to_sql(expr) expected = """\ WITH t0 AS ( SELECT t2.*, t3.* FROM tpch_region t2 INNER JOIN tpch_nation t3 ON t2.`r_regionkey` = t3.`n_regionkey` ) SELECT t0.`r_name`, t1.`n_name` FROM t0 INNER JOIN t0 t1 ON t0.`r_regionkey` = t1.`r_regionkey`""" assert result == expected
def test_self_join_subquery_distinct_equal(self): region = self.con.table('tpch_region') nation = self.con.table('tpch_nation') j1 = (region.join(nation, region.r_regionkey == nation.n_regionkey) [region, nation]) j2 = (region.join(nation, region.r_regionkey == nation.n_regionkey) [region, nation].view()) expr = (j1.join(j2, j1.r_regionkey == j2.r_regionkey) [j1.r_name, j2.n_name]) result = to_sql(expr) expected = """\ WITH t0 AS ( SELECT t2.*, t3.* FROM tpch_region t2 INNER JOIN tpch_nation t3 ON t2.`r_regionkey` = t3.`n_regionkey` ) SELECT t0.`r_name`, t1.`n_name` FROM t0 INNER JOIN t0 t1 ON t0.`r_regionkey` = t1.`r_regionkey`""" assert result == expected
def test_double_nested_subquery_no_aliases(self): # We don't require any table aliasing anywhere t = api.table([ ('key1', 'string'), ('key2', 'string'), ('key3', 'string'), ('value', 'double') ], 'foo_table') agg1 = t.aggregate([t.value.sum().name('total')], by=['key1', 'key2', 'key3']) agg2 = agg1.aggregate([agg1.total.sum().name('total')], by=['key1', 'key2']) agg3 = agg2.aggregate([agg2.total.sum().name('total')], by=['key1']) result = to_sql(agg3) expected = """SELECT `key1`, sum(`total`) AS `total` FROM ( SELECT `key1`, `key2`, sum(`total`) AS `total` FROM ( SELECT `key1`, `key2`, `key3`, sum(`value`) AS `total` FROM foo_table GROUP BY 1, 2, 3 ) t1 GROUP BY 1, 2 ) t0 GROUP BY 1""" assert result == expected
def test_subquery_used_for_self_join(self): # There could be cases that should look in SQL like # WITH t0 as (some subquery) # select ... # from t0 t1 # join t0 t2 # on t1.kind = t2.subkind # ... # However, the Ibis code will simply have an expression (projection or # aggregation, say) built on top of the subquery expression, so we need # to extract the subquery unit (we see that it appears multiple times # in the tree). t = self.con.table('alltypes') agged = t.aggregate([t.f.sum().name('total')], by=['g', 'a', 'b']) view = agged.view() metrics = [(agged.total - view.total).max().name('metric')] reagged = (agged.inner_join(view, [agged.a == view.b]).aggregate( metrics, by=[agged.g])) result = to_sql(reagged) expected = """WITH t0 AS ( SELECT `g`, `a`, `b`, sum(`f`) AS `total` FROM alltypes GROUP BY 1, 2, 3 ) SELECT t0.`g`, max(t0.`total` - t1.`total`) AS `metric` FROM t0 INNER JOIN t0 t1 ON t0.`a` = t1.`b` GROUP BY 1""" assert result == expected
def test_case_in_projection(self): t = self.con.table('alltypes') expr = (t.g.case() .when('foo', 'bar') .when('baz', 'qux') .else_('default').end()) expr2 = (api.case() .when(t.g == 'foo', 'bar') .when(t.g == 'baz', t.g) .end()) proj = t[expr.name('col1'), expr2.name('col2'), t] result = to_sql(proj) expected = """SELECT CASE `g` WHEN 'foo' THEN 'bar' WHEN 'baz' THEN 'qux' ELSE 'default' END AS `col1`, CASE WHEN `g` = 'foo' THEN 'bar' WHEN `g` = 'baz' THEN `g` ELSE NULL END AS `col2`, * FROM alltypes""" assert result == expected
def test_table_names_overlap_default_aliases(self): # see discussion in #104; this actually is not needed for query # correctness, and only makes the generated SQL nicer raise unittest.SkipTest t0 = api.table([ ('key', 'string'), ('v1', 'double') ], 't1') t1 = api.table([ ('key', 'string'), ('v2', 'double') ], 't0') expr = t0.join(t1, t0.key == t1.key)[t0.key, t0.v1, t1.v2] result = to_sql(expr) expected = """\ SELECT t2.`key`, t2.`v1`, t3.`v2` FROM t0 t2 INNER JOIN t1 t3 ON t2.`key` = t3.`key`""" assert result == expected
def test_topk_predicate_pushdown_bug(self): # Observed on TPCH data cplusgeo = (customer.inner_join( nation, [customer.c_nationkey == nation.n_nationkey]).inner_join( region, [nation.n_regionkey == region.r_regionkey])[customer, nation.n_name, region.r_name]) pred = cplusgeo.n_name.topk(10, by=cplusgeo.c_acctbal.sum()) expr = cplusgeo.filter([pred]) result = to_sql(expr) expected = """\ SELECT t0.*, t1.`n_name`, t2.`r_name` FROM customer t0 INNER JOIN nation t1 ON t0.`c_nationkey` = t1.`n_nationkey` INNER JOIN region t2 ON t1.`n_regionkey` = t2.`r_regionkey` LEFT SEMI JOIN ( SELECT t1.`n_name`, sum(t0.`c_acctbal`) AS `sum` FROM customer t0 INNER JOIN nation t1 ON t0.`c_nationkey` = t1.`n_nationkey` INNER JOIN region t2 ON t1.`n_regionkey` = t2.`r_regionkey` GROUP BY 1 ORDER BY `sum` DESC LIMIT 10 ) t3 ON t1.`n_name` = t3.`n_name`""" assert result == expected
def test_topk_predicate_pushdown_bug(self): # Observed on TPCH data cplusgeo = ( customer.inner_join(nation, [customer.c_nationkey == nation.n_nationkey]) .inner_join(region, [nation.n_regionkey == region.r_regionkey]) [customer, nation.n_name, region.r_name]) pred = cplusgeo.n_name.topk(10, by=cplusgeo.c_acctbal.sum()) expr = cplusgeo.filter([pred]) result = to_sql(expr) expected = """\ SELECT t0.*, t1.`n_name`, t2.`r_name` FROM customer t0 INNER JOIN nation t1 ON t0.`c_nationkey` = t1.`n_nationkey` INNER JOIN region t2 ON t1.`n_regionkey` = t2.`r_regionkey` LEFT SEMI JOIN ( SELECT t1.`n_name`, sum(t0.`c_acctbal`) AS `sum` FROM customer t0 INNER JOIN nation t1 ON t0.`c_nationkey` = t1.`n_nationkey` INNER JOIN region t2 ON t1.`n_regionkey` = t2.`r_regionkey` GROUP BY 1 ORDER BY `sum` DESC LIMIT 10 ) t3 ON t1.`n_name` = t3.`n_name`""" assert result == expected
def test_expr_list_no_table_refs(self): exlist = ibis.api.expr_list([ibis.literal(1).name('a'), ibis.now().name('b'), ibis.literal(2).log().name('c')]) result = to_sql(exlist) expected = """\ SELECT 1 AS `a`, now() AS `b`, ln(2) AS `c`""" assert result == expected
def test_array_distinct(self): t = self.con.table('functional_alltypes') expr = t.string_col.distinct() result = to_sql(expr) expected = """SELECT DISTINCT `string_col` FROM functional_alltypes""" assert result == expected
def test_nameless_table(self): # Ensure that user gets some kind of sensible error nameless = api.table([('key', 'string')]) self.assertRaises(com.RelationError, to_sql, nameless) with_name = api.table([('key', 'string')], name='baz') result = to_sql(with_name) assert result == 'SELECT *\nFROM baz'
def test_simple_table_distinct(self): t = self.con.table('functional_alltypes') expr = t[t.string_col, t.int_col].distinct() result = to_sql(expr) expected = """SELECT DISTINCT `string_col`, `int_col` FROM functional_alltypes""" assert result == expected
def test_identifier_quoting(self): data = api.table([('date', 'int32'), ('explain', 'string')], 'table') expr = data[data.date.name('else'), data.explain.name('join')] result = to_sql(expr) expected = """SELECT `date` AS `else`, `explain` AS `join` FROM `table`""" assert result == expected
def test_limit(self): table = self.con.table('star1').limit(10) result = to_sql(table) expected = """SELECT * FROM star1 LIMIT 10""" assert result == expected table = self.con.table('star1').limit(10, offset=5) result = to_sql(table) expected = """SELECT * FROM star1 LIMIT 10 OFFSET 5""" assert result == expected # Put the limit in a couple places in the stack table = self.con.table('star1') table = table[table.f > 0].limit(10) result = to_sql(table) expected = """SELECT * FROM star1 WHERE `f` > 0 LIMIT 10""" assert result == expected table = self.con.table('star1') # Semantically, this should produce a subquery table = table.limit(10) table = table[table.f > 0] result2 = to_sql(table) expected2 = """SELECT * FROM ( SELECT * FROM star1 LIMIT 10 ) t0 WHERE `f` > 0""" assert result2 == expected2
def test_aggregate_projection_subquery(self): t = self.con.table('alltypes') proj = t[t.f > 0][t, (t.a + t.b).name('foo')] def agg(x): return x.aggregate([x.foo.sum().name('foo total')], by=['g']) # predicate gets pushed down filtered = proj[proj.g == 'bar'] result = to_sql(filtered) expected = """SELECT *, `a` + `b` AS `foo` FROM alltypes WHERE `f` > 0 AND `g` = 'bar'""" assert result == expected agged = agg(filtered) result = to_sql(agged) expected = """SELECT `g`, sum(`foo`) AS `foo total` FROM ( SELECT *, `a` + `b` AS `foo` FROM alltypes WHERE `f` > 0 AND `g` = 'bar' ) t0 GROUP BY 1""" assert result == expected # Pushdown is not possible (in Impala, Postgres, others) agged2 = agg(proj[proj.foo < 10]) result = to_sql(agged2) expected = """SELECT t0.`g`, sum(t0.`foo`) AS `foo total` FROM ( SELECT *, `a` + `b` AS `foo` FROM alltypes WHERE `f` > 0 ) t0 WHERE t0.`foo` < 10 GROUP BY 1""" assert result == expected
def test_isnull_case_expr_rewrite_failure(self): # #172, case expression that was not being properly converted into an # aggregation reduction = self.table.g.isnull().ifelse(1, 0).sum() result = to_sql(reduction) expected = """\ SELECT sum(CASE WHEN `g` IS NULL THEN 1 ELSE 0 END) AS `tmp` FROM alltypes""" assert result == expected
def test_expr_list_no_table_refs(self): exlist = ibis.api.expr_list([ ibis.literal(1).name('a'), ibis.now().name('b'), ibis.literal(2).log().name('c') ]) result = to_sql(exlist) expected = """\ SELECT 1 AS `a`, now() AS `b`, ln(2) AS `c`""" assert result == expected
def test_topk_operation_to_semi_join(self): # TODO: top K with filter in place table = api.table([ ('foo', 'string'), ('bar', 'string'), ('city', 'string'), ('v1', 'double'), ('v2', 'double'), ], 'tbl') what = table.city.topk(10, by=table.v2.mean()) filtered = table[what] query = to_sql(filtered) expected = """SELECT t0.* FROM tbl t0 LEFT SEMI JOIN ( SELECT `city`, avg(`v2`) AS `mean` FROM tbl GROUP BY 1 ORDER BY `mean` DESC LIMIT 10 ) t1 ON t0.`city` = t1.`city`""" assert query == expected # Test the default metric (count) what = table.city.topk(10) filtered2 = table[what] query = to_sql(filtered2) expected = """SELECT t0.* FROM tbl t0 LEFT SEMI JOIN ( SELECT `city`, count(`city`) AS `count` FROM tbl GROUP BY 1 ORDER BY `count` DESC LIMIT 10 ) t1 ON t0.`city` = t1.`city`""" assert query == expected
def test_where_with_between(self): t = self.con.table('alltypes') what = t.filter([t.a > 0, t.f.between(0, 1)]) result = to_sql(what) expected = """SELECT * FROM alltypes WHERE `a` > 0 AND `f` BETWEEN 0 AND 1""" assert result == expected
def test_join_just_materialized(self): t1 = self.con.table('tpch_nation') t2 = self.con.table('tpch_region') t3 = self.con.table('tpch_customer') # GH #491 joined = (t1.inner_join(t2, t1.n_regionkey == t2.r_regionkey) .inner_join(t3, t1.n_nationkey == t3.c_nationkey)) result = to_sql(joined) expected = """SELECT * FROM tpch_nation t0 INNER JOIN tpch_region t1 ON t0.`n_regionkey` = t1.`r_regionkey` INNER JOIN tpch_customer t2 ON t0.`n_nationkey` = t2.`c_nationkey`""" assert result == expected result = to_sql(joined.materialize()) assert result == expected
def test_where_uncorrelated_subquery(self): expr = self.foo[self.foo.job.isin(self.bar.job)] result = to_sql(expr) expected = """SELECT * FROM foo WHERE `job` IN ( SELECT `job` FROM bar )""" assert result == expected
def test_join_just_materialized(self): t1 = self.con.table('tpch_nation') t2 = self.con.table('tpch_region') t3 = self.con.table('tpch_customer') # GH #491 joined = (t1.inner_join(t2, t1.n_regionkey == t2.r_regionkey).inner_join( t3, t1.n_nationkey == t3.c_nationkey)) result = to_sql(joined) expected = """SELECT * FROM tpch_nation t0 INNER JOIN tpch_region t1 ON t0.`n_regionkey` = t1.`r_regionkey` INNER JOIN tpch_customer t2 ON t0.`n_nationkey` = t2.`c_nationkey`""" assert result == expected result = to_sql(joined.materialize()) assert result == expected
def test_where_simple_comparisons(self): t1 = self.con.table('star1') what = t1.filter([t1.f > 0, t1.c < t1.f * 2]) result = to_sql(what) expected = """SELECT * FROM star1 WHERE `f` > 0 AND `c` < (`f` * 2)""" assert result == expected
def test_join_no_predicates_for_impala(self): # Impala requires that joins without predicates be written explicitly # as CROSS JOIN, since result sets can accidentally get too large if a # query is executed before predicates are written t1 = self.con.table('star1') t2 = self.con.table('star2') joined2 = t1.cross_join(t2)[[t1]] expected = """SELECT t0.* FROM star1 t0 CROSS JOIN star2 t1""" result2 = to_sql(joined2) assert result2 == expected for jtype in ['inner_join', 'left_join', 'outer_join']: joined = getattr(t1, jtype)(t2)[[t1]] result = to_sql(joined) assert result == expected
def test_semi_anti_joins(self): t1 = self.con.table('star1') t2 = self.con.table('star2') joined = t1.semi_join(t2, [t1.foo_id == t2.foo_id])[[t1]] result = to_sql(joined) expected = """SELECT t0.* FROM star1 t0 LEFT SEMI JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id`""" assert result == expected joined = t1.anti_join(t2, [t1.foo_id == t2.foo_id])[[t1]] result = to_sql(joined) expected = """SELECT t0.* FROM star1 t0 LEFT ANTI JOIN star2 t1 ON t0.`foo_id` = t1.`foo_id`""" assert result == expected