Example #1
0
    def test_aggregate_table_count_metric(self):
        expr = self.con.table('star1').count()

        result = to_sql(expr)
        expected = """SELECT count(*) AS `tmp`
FROM star1"""
        assert result == expected

        # count on more complicated table
        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')
        join_expr = region.r_regionkey == nation.n_regionkey
        joined = region.inner_join(nation, join_expr)
        table_ref = joined[nation, region.r_name.name('region')]

        expr = table_ref.count()
        result = to_sql(expr)
        expected = """SELECT count(*) AS `tmp`
FROM (
  SELECT t2.*, t1.`r_name` AS `region`
  FROM tpch_region t1
    INNER JOIN tpch_nation t2
      ON t1.`r_regionkey` = t2.`n_regionkey`
) t0"""
        assert result == expected
Example #2
0
    def test_where_with_join(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        # This also tests some cases of predicate pushdown
        what = (t1.inner_join(t2, [t1.foo_id == t2.foo_id])
                .projection([t1, t2.value1, t2.value3])
                .filter([t1.f > 0, t2.value3 < 1000]))

        what2 = (t1.inner_join(t2, [t1.foo_id == t2.foo_id])
                 .filter([t1.f > 0, t2.value3 < 1000])
                 .projection([t1, t2.value1, t2.value3]))

        expected_sql = """SELECT t0.*, t1.`value1`, t1.`value3`
FROM star1 t0
  INNER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`
WHERE t0.`f` > 0 AND
      t1.`value3` < 1000"""

        result_sql = to_sql(what)
        assert result_sql == expected_sql

        result2_sql = to_sql(what2)
        assert result2_sql == expected_sql
Example #3
0
    def test_aggregate_table_count_metric(self):
        expr = self.con.table('star1').count()

        result = to_sql(expr)
        expected = """SELECT count(*) AS `tmp`
FROM star1"""
        assert result == expected

        # count on more complicated table
        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')
        join_expr = region.r_regionkey == nation.n_regionkey
        joined = region.inner_join(nation, join_expr)
        table_ref = joined[nation, region.r_name.name('region')]

        expr = table_ref.count()
        result = to_sql(expr)
        expected = """SELECT count(*) AS `tmp`
FROM (
  SELECT t2.*, t1.`r_name` AS `region`
  FROM tpch_region t1
    INNER JOIN tpch_nation t2
      ON t1.`r_regionkey` = t2.`n_regionkey`
) t0"""
        assert result == expected
Example #4
0
    def test_filter_subquery_derived_reduction(self):
        t1 = self.con.table('star1')

        # Reduction can be nested inside some scalar expression
        pred3 = t1.f > t1[t1.foo_id == 'foo'].f.mean().log()
        pred4 = t1.f > (t1[t1.foo_id == 'foo'].f.mean().log() + 1)

        expr3 = t1[pred3]
        result = to_sql(expr3)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT ln(avg(`f`)) AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected

        expr4 = t1[pred4]

        result = to_sql(expr4)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT ln(avg(`f`)) + 1 AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected
Example #5
0
    def test_exists_semi_join_case(self):
        t1, t2 = self.t1, self.t2

        cond = (t1.key1 == t2.key1).any()
        expr = t1[cond]

        result = to_sql(expr)
        expected = """SELECT t0.*
FROM foo t0
WHERE EXISTS (
  SELECT 1
  FROM bar t1
  WHERE t0.`key1` = t1.`key1`
)"""
        assert result == expected

        cond2 = ((t1.key1 == t2.key1) & (t2.key2 == 'foo')).any()
        expr2 = t1[cond2]

        result = to_sql(expr2)
        expected = """SELECT t0.*
FROM foo t0
WHERE EXISTS (
  SELECT 1
  FROM bar t1
  WHERE t0.`key1` = t1.`key1` AND
        t1.`key2` = 'foo'
)"""
        assert result == expected
Example #6
0
    def test_filter_subquery_derived_reduction(self):
        t1 = self.con.table('star1')

        # Reduction can be nested inside some scalar expression
        pred3 = t1.f > t1[t1.foo_id == 'foo'].f.mean().log()
        pred4 = t1.f > (t1[t1.foo_id == 'foo'].f.mean().log() + 1)

        expr3 = t1[pred3]
        result = to_sql(expr3)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT ln(avg(`f`)) AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected

        expr4 = t1[pred4]

        result = to_sql(expr4)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT ln(avg(`f`)) + 1 AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected
Example #7
0
    def test_subquery_in_filter_predicate(self):
        # E.g. comparing against some scalar aggregate value. See Ibis #43
        t1 = self.con.table('star1')

        pred = t1.f > t1.f.mean()
        expr = t1[pred]

        # This brought out another expression rewriting bug, since the filtered
        # table isn't found elsewhere in the expression.
        pred2 = t1.f > t1[t1.foo_id == 'foo'].f.mean()
        expr2 = t1[pred2]

        result = to_sql(expr)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT avg(`f`) AS `tmp`
  FROM star1
)"""
        assert result == expected

        result = to_sql(expr2)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT avg(`f`) AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected
Example #8
0
    def test_tpch_self_join_failure(self):
        # duplicating the integration test here

        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')
        customer = self.con.table('tpch_customer')
        orders = self.con.table('tpch_orders')

        fields_of_interest = [
            region.r_name.name('region'),
            nation.n_name.name('nation'),
            orders.o_totalprice.name('amount'),
            orders.o_orderdate.cast('timestamp').name('odate')]

        joined_all = (
            region.join(nation, region.r_regionkey == nation.n_regionkey)
            .join(customer, customer.c_nationkey == nation.n_nationkey)
            .join(orders, orders.o_custkey == customer.c_custkey)
            [fields_of_interest])

        year = joined_all.odate.year().name('year')
        total = joined_all.amount.sum().cast('double').name('total')
        annual_amounts = (joined_all
                          .group_by(['region', year])
                          .aggregate(total))

        current = annual_amounts
        prior = annual_amounts.view()

        yoy_change = (current.total - prior.total).name('yoy_change')
        yoy = (current.join(prior, current.year == (prior.year - 1))
               [current.region, current.year, yoy_change])
        to_sql(yoy)
Example #9
0
    def test_exists_semi_join_case(self):
        t1, t2 = self.t1, self.t2

        cond = (t1.key1 == t2.key1).any()
        expr = t1[cond]

        result = to_sql(expr)
        expected = """SELECT t0.*
FROM foo t0
WHERE EXISTS (
  SELECT 1
  FROM bar t1
  WHERE t0.`key1` = t1.`key1`
)"""
        assert result == expected

        cond2 = ((t1.key1 == t2.key1) & (t2.key2 == 'foo')).any()
        expr2 = t1[cond2]

        result = to_sql(expr2)
        expected = """SELECT t0.*
FROM foo t0
WHERE EXISTS (
  SELECT 1
  FROM bar t1
  WHERE t0.`key1` = t1.`key1` AND
        t1.`key2` = 'foo'
)"""
        assert result == expected
Example #10
0
    def test_subquery_in_filter_predicate(self):
        # E.g. comparing against some scalar aggregate value. See Ibis #43
        t1 = self.con.table('star1')

        pred = t1.f > t1.f.mean()
        expr = t1[pred]

        # This brought out another expression rewriting bug, since the filtered
        # table isn't found elsewhere in the expression.
        pred2 = t1.f > t1[t1.foo_id == 'foo'].f.mean()
        expr2 = t1[pred2]

        result = to_sql(expr)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT avg(`f`) AS `tmp`
  FROM star1
)"""
        assert result == expected

        result = to_sql(expr2)
        expected = """SELECT *
FROM star1
WHERE `f` > (
  SELECT avg(`f`) AS `tmp`
  FROM star1
  WHERE `foo_id` = 'foo'
)"""
        assert result == expected
Example #11
0
    def test_topk_to_aggregate(self):
        t = ibis.table([('dest', 'string'), ('origin', 'string'),
                        ('arrdelay', 'int32')], 'airlines')

        top = t.dest.topk(10, by=t.arrdelay.mean())

        result = to_sql(top)
        expected = to_sql(top.to_aggregation())
        assert result == expected
Example #12
0
    def test_topk_to_aggregate(self):
        t = ibis.table([('dest', 'string'),
                        ('origin', 'string'),
                        ('arrdelay', 'int32')], 'airlines')

        top = t.dest.topk(10, by=t.arrdelay.mean())

        result = to_sql(top)
        expected = to_sql(top.to_aggregation())
        assert result == expected
Example #13
0
    def test_fuse_projections(self):
        table = api.table([
            ('foo', 'int32'),
            ('bar', 'int64'),
            ('value', 'double')
        ], name='tbl')

        # Cases where we project in both cases using the base table reference
        f1 = (table['foo'] + table['bar']).name('baz')
        pred = table['value'] > 0

        table2 = table[table, f1]
        table2_filtered = table2[pred]

        f2 = (table2['foo'] * 2).name('qux')
        f3 = (table['foo'] * 2).name('qux')

        table3 = table2.projection([table2, f2])

        # fusion works even if there's a filter
        table3_filtered = table2_filtered.projection([table2, f2])

        expected = table[table, f1, f3]
        expected2 = table[pred][table, f1, f3]

        assert table3.equals(expected)
        assert table3_filtered.equals(expected2)

        ex_sql = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux`
FROM tbl"""

        ex_sql2 = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux`
FROM tbl
WHERE `value` > 0"""

        table3_sql = to_sql(table3)
        table3_filt_sql = to_sql(table3_filtered)

        assert table3_sql == ex_sql
        assert table3_filt_sql == ex_sql2

        # Use the intermediate table refs
        table3 = table2.projection([table2, f2])

        # fusion works even if there's a filter
        table3_filtered = table2_filtered.projection([table2, f2])

        expected = table[table, f1, f3]
        expected2 = table[pred][table, f1, f3]

        assert table3.equals(expected)
        assert table3_filtered.equals(expected2)
Example #14
0
    def test_unsupported_aggregate_functions(self):
        t = self.con.table('alltypes')
        w = ibis.window(order_by=t.d)

        exprs = [
            t.f.approx_nunique(),
            t.f.approx_median(),
            t.g.group_concat(),
        ]

        for expr in exprs:
            with self.assertRaises(com.TranslationError):
                proj = t.projection([expr.over(w).name('foo')])
                to_sql(proj)
Example #15
0
    def test_isin_notin_in_select(self):
        filtered = self.table[self.table.g.isin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE g IN ('foo', 'bar')"""
        assert result == expected

        filtered = self.table[self.table.g.notin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE g NOT IN ('foo', 'bar')"""
        assert result == expected
Example #16
0
    def test_fuse_projections(self):
        table = api.table([('foo', 'int32'), ('bar', 'int64'),
                           ('value', 'double')],
                          name='tbl')

        # Cases where we project in both cases using the base table reference
        f1 = (table['foo'] + table['bar']).name('baz')
        pred = table['value'] > 0

        table2 = table[table, f1]
        table2_filtered = table2[pred]

        f2 = (table2['foo'] * 2).name('qux')
        f3 = (table['foo'] * 2).name('qux')

        table3 = table2.projection([table2, f2])

        # fusion works even if there's a filter
        table3_filtered = table2_filtered.projection([table2, f2])

        expected = table[table, f1, f3]
        expected2 = table[pred][table, f1, f3]

        assert table3.equals(expected)
        assert table3_filtered.equals(expected2)

        ex_sql = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux`
FROM tbl"""

        ex_sql2 = """SELECT *, `foo` + `bar` AS `baz`, `foo` * 2 AS `qux`
FROM tbl
WHERE `value` > 0"""

        table3_sql = to_sql(table3)
        table3_filt_sql = to_sql(table3_filtered)

        assert table3_sql == ex_sql
        assert table3_filt_sql == ex_sql2

        # Use the intermediate table refs
        table3 = table2.projection([table2, f2])

        # fusion works even if there's a filter
        table3_filtered = table2_filtered.projection([table2, f2])

        expected = table[table, f1, f3]
        expected2 = table[pred][table, f1, f3]

        assert table3.equals(expected)
        assert table3_filtered.equals(expected2)
Example #17
0
    def test_unsupported_aggregate_functions(self):
        t = self.con.table('alltypes')
        w = ibis.window(order_by=t.d)

        exprs = [
            t.f.approx_nunique(),
            t.f.approx_median(),
            t.g.group_concat(),
        ]

        for expr in exprs:
            with self.assertRaises(com.TranslationError):
                proj = t.projection([expr.over(w).name('foo')])
                to_sql(proj)
Example #18
0
    def test_isin_notin_in_select(self):
        filtered = self.table[self.table.g.isin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE g IN ('foo', 'bar')"""
        assert result == expected

        filtered = self.table[self.table.g.notin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE g NOT IN ('foo', 'bar')"""
        assert result == expected
Example #19
0
    def test_simple_join_formatting(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        pred = t1['foo_id'] == t2['foo_id']
        pred2 = t1['bar_id'] == t2['foo_id']
        cases = [
            (t1.inner_join(t2, [pred])[[t1]],
             """SELECT t0.*
FROM star1 t0
  INNER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            (t1.left_join(t2, [pred])[[t1]],
             """SELECT t0.*
FROM star1 t0
  LEFT OUTER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            (t1.outer_join(t2, [pred])[[t1]],
             """SELECT t0.*
FROM star1 t0
  FULL OUTER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            # multiple predicates
            (t1.inner_join(t2, [pred, pred2])[[t1]],
             """SELECT t0.*
FROM star1 t0
  INNER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id` AND
       t0.`bar_id` = t1.`foo_id`"""),
        ]

        for expr, expected_sql in cases:
            result_sql = to_sql(expr)
            assert result_sql == expected_sql
Example #20
0
    def test_where_no_pushdown_possible(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        joined = (t1.inner_join(t2, [t1.foo_id == t2.foo_id])
                  [t1, (t1.f - t2.value1).name('diff')])

        filtered = joined[joined.diff > 1]

        # TODO: I'm not sure if this is exactly what we want
        expected_sql = """SELECT *
FROM (
  SELECT t0.*, t0.`f` - t1.`value1` AS `diff`
  FROM star1 t0
    INNER JOIN star2 t1
      ON t0.`foo_id` = t1.`foo_id`
  WHERE t0.`f` > 0 AND
        t1.`value3` < 1000
)
WHERE `diff` > 1"""

        raise unittest.SkipTest

        result_sql = to_sql(filtered)
        assert result_sql == expected_sql
Example #21
0
    def test_case_in_projection(self):
        t = self.con.table('alltypes')

        expr = (t.g.case().when('foo',
                                'bar').when('baz',
                                            'qux').else_('default').end())

        expr2 = (api.case().when(t.g == 'foo', 'bar').when(t.g == 'baz',
                                                           t.g).end())

        proj = t[expr.name('col1'), expr2.name('col2'), t]

        result = to_sql(proj)
        expected = """SELECT
  CASE `g`
    WHEN 'foo' THEN 'bar'
    WHEN 'baz' THEN 'qux'
    ELSE 'default'
  END AS `col1`,
  CASE
    WHEN `g` = 'foo' THEN 'bar'
    WHEN `g` = 'baz' THEN `g`
    ELSE NULL
  END AS `col2`, *
FROM alltypes"""
        assert result == expected
Example #22
0
    def test_subquery_used_for_self_join(self):
        # There could be cases that should look in SQL like
        # WITH t0 as (some subquery)
        # select ...
        # from t0 t1
        #   join t0 t2
        #     on t1.kind = t2.subkind
        # ...
        # However, the Ibis code will simply have an expression (projection or
        # aggregation, say) built on top of the subquery expression, so we need
        # to extract the subquery unit (we see that it appears multiple times
        # in the tree).
        t = self.con.table('alltypes')

        agged = t.aggregate([t.f.sum().name('total')], by=['g', 'a', 'b'])
        view = agged.view()
        metrics = [(agged.total - view.total).max().name('metric')]
        reagged = (agged.inner_join(view, [agged.a == view.b])
                   .aggregate(metrics, by=[agged.g]))

        result = to_sql(reagged)
        expected = """WITH t0 AS (
  SELECT `g`, `a`, `b`, sum(`f`) AS `total`
  FROM alltypes
  GROUP BY 1, 2, 3
)
SELECT t0.`g`, max(t0.`total` - t1.`total`) AS `metric`
FROM t0
  INNER JOIN t0 t1
    ON t0.`a` = t1.`b`
GROUP BY 1"""
        assert result == expected
Example #23
0
    def test_topk_analysis_bug(self):
        # GH #398
        airlines = ibis.table([('dest', 'string'), ('origin', 'string'),
                               ('arrdelay', 'int32')], 'airlines')

        dests = ['ORD', 'JFK', 'SFO']
        t = airlines[airlines.dest.isin(dests)]
        delay_filter = t.dest.topk(10, by=t.arrdelay.mean())
        expr = t[delay_filter].group_by('origin').size()

        result = to_sql(expr)
        expected = """\
SELECT t0.`origin`, count(*) AS `count`
FROM airlines t0
  LEFT SEMI JOIN (
    SELECT `dest`, avg(`arrdelay`) AS `mean`
    FROM airlines
    WHERE `dest` IN ('ORD', 'JFK', 'SFO')
    GROUP BY 1
    ORDER BY `mean` DESC
    LIMIT 10
  ) t1
    ON t0.`dest` = t1.`dest`
WHERE t0.`dest` IN ('ORD', 'JFK', 'SFO')
GROUP BY 1"""

        assert result == expected
Example #24
0
    def test_topk_analysis_bug(self):
        # GH #398
        airlines = ibis.table([('dest', 'string'),
                               ('origin', 'string'),
                               ('arrdelay', 'int32')], 'airlines')

        dests = ['ORD', 'JFK', 'SFO']
        t = airlines[airlines.dest.isin(dests)]
        delay_filter = t.dest.topk(10, by=t.arrdelay.mean())
        expr = t[delay_filter].group_by('origin').size()

        result = to_sql(expr)
        expected = """\
SELECT t0.`origin`, count(*) AS `count`
FROM airlines t0
  LEFT SEMI JOIN (
    SELECT `dest`, avg(`arrdelay`) AS `mean`
    FROM airlines
    WHERE `dest` IN ('ORD', 'JFK', 'SFO')
    GROUP BY 1
    ORDER BY `mean` DESC
    LIMIT 10
  ) t1
    ON t0.`dest` = t1.`dest`
WHERE t0.`dest` IN ('ORD', 'JFK', 'SFO')
GROUP BY 1"""

        assert result == expected
Example #25
0
    def test_where_no_pushdown_possible(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        joined = (t1.inner_join(
            t2, [t1.foo_id == t2.foo_id])[t1, (t1.f - t2.value1).name('diff')])

        filtered = joined[joined.diff > 1]

        # TODO: I'm not sure if this is exactly what we want
        expected_sql = """SELECT *
FROM (
  SELECT t0.*, t0.`f` - t1.`value1` AS `diff`
  FROM star1 t0
    INNER JOIN star2 t1
      ON t0.`foo_id` = t1.`foo_id`
  WHERE t0.`f` > 0 AND
        t1.`value3` < 1000
)
WHERE `diff` > 1"""

        raise unittest.SkipTest

        result_sql = to_sql(filtered)
        assert result_sql == expected_sql
Example #26
0
    def test_self_join_subquery_distinct_equal(self):
        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')

        j1 = (region.join(nation,
                          region.r_regionkey == nation.n_regionkey)[region,
                                                                    nation])

        j2 = (region.join(
            nation, region.r_regionkey == nation.n_regionkey)[region,
                                                              nation].view())

        expr = (j1.join(j2, j1.r_regionkey == j2.r_regionkey)[j1.r_name,
                                                              j2.n_name])

        result = to_sql(expr)
        expected = """\
WITH t0 AS (
  SELECT t2.*, t3.*
  FROM tpch_region t2
    INNER JOIN tpch_nation t3
      ON t2.`r_regionkey` = t3.`n_regionkey`
)
SELECT t0.`r_name`, t1.`n_name`
FROM t0
  INNER JOIN t0 t1
    ON t0.`r_regionkey` = t1.`r_regionkey`"""

        assert result == expected
Example #27
0
    def test_self_join_subquery_distinct_equal(self):
        region = self.con.table('tpch_region')
        nation = self.con.table('tpch_nation')

        j1 = (region.join(nation, region.r_regionkey == nation.n_regionkey)
              [region, nation])

        j2 = (region.join(nation, region.r_regionkey == nation.n_regionkey)
              [region, nation].view())

        expr = (j1.join(j2, j1.r_regionkey == j2.r_regionkey)
                [j1.r_name, j2.n_name])

        result = to_sql(expr)
        expected = """\
WITH t0 AS (
  SELECT t2.*, t3.*
  FROM tpch_region t2
    INNER JOIN tpch_nation t3
      ON t2.`r_regionkey` = t3.`n_regionkey`
)
SELECT t0.`r_name`, t1.`n_name`
FROM t0
  INNER JOIN t0 t1
    ON t0.`r_regionkey` = t1.`r_regionkey`"""

        assert result == expected
Example #28
0
    def test_double_nested_subquery_no_aliases(self):
        # We don't require any table aliasing anywhere
        t = api.table([
            ('key1', 'string'),
            ('key2', 'string'),
            ('key3', 'string'),
            ('value', 'double')
        ], 'foo_table')

        agg1 = t.aggregate([t.value.sum().name('total')],
                           by=['key1', 'key2', 'key3'])
        agg2 = agg1.aggregate([agg1.total.sum().name('total')],
                              by=['key1', 'key2'])
        agg3 = agg2.aggregate([agg2.total.sum().name('total')],
                              by=['key1'])

        result = to_sql(agg3)
        expected = """SELECT `key1`, sum(`total`) AS `total`
FROM (
  SELECT `key1`, `key2`, sum(`total`) AS `total`
  FROM (
    SELECT `key1`, `key2`, `key3`, sum(`value`) AS `total`
    FROM foo_table
    GROUP BY 1, 2, 3
  ) t1
  GROUP BY 1, 2
) t0
GROUP BY 1"""
        assert result == expected
Example #29
0
    def test_subquery_used_for_self_join(self):
        # There could be cases that should look in SQL like
        # WITH t0 as (some subquery)
        # select ...
        # from t0 t1
        #   join t0 t2
        #     on t1.kind = t2.subkind
        # ...
        # However, the Ibis code will simply have an expression (projection or
        # aggregation, say) built on top of the subquery expression, so we need
        # to extract the subquery unit (we see that it appears multiple times
        # in the tree).
        t = self.con.table('alltypes')

        agged = t.aggregate([t.f.sum().name('total')], by=['g', 'a', 'b'])
        view = agged.view()
        metrics = [(agged.total - view.total).max().name('metric')]
        reagged = (agged.inner_join(view, [agged.a == view.b]).aggregate(
            metrics, by=[agged.g]))

        result = to_sql(reagged)
        expected = """WITH t0 AS (
  SELECT `g`, `a`, `b`, sum(`f`) AS `total`
  FROM alltypes
  GROUP BY 1, 2, 3
)
SELECT t0.`g`, max(t0.`total` - t1.`total`) AS `metric`
FROM t0
  INNER JOIN t0 t1
    ON t0.`a` = t1.`b`
GROUP BY 1"""
        assert result == expected
Example #30
0
    def test_simple_join_formatting(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        pred = t1['foo_id'] == t2['foo_id']
        pred2 = t1['bar_id'] == t2['foo_id']
        cases = [
            (t1.inner_join(t2, [pred])[[t1]], """SELECT t0.*
FROM star1 t0
  INNER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            (t1.left_join(t2, [pred])[[t1]], """SELECT t0.*
FROM star1 t0
  LEFT OUTER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            (t1.outer_join(t2, [pred])[[t1]], """SELECT t0.*
FROM star1 t0
  FULL OUTER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""),
            # multiple predicates
            (t1.inner_join(t2, [pred, pred2])[[t1]], """SELECT t0.*
FROM star1 t0
  INNER JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id` AND
       t0.`bar_id` = t1.`foo_id`"""),
        ]

        for expr, expected_sql in cases:
            result_sql = to_sql(expr)
            assert result_sql == expected_sql
Example #31
0
    def test_case_in_projection(self):
        t = self.con.table('alltypes')

        expr = (t.g.case()
                .when('foo', 'bar')
                .when('baz', 'qux')
                .else_('default').end())

        expr2 = (api.case()
                 .when(t.g == 'foo', 'bar')
                 .when(t.g == 'baz', t.g)
                 .end())

        proj = t[expr.name('col1'), expr2.name('col2'), t]

        result = to_sql(proj)
        expected = """SELECT
  CASE `g`
    WHEN 'foo' THEN 'bar'
    WHEN 'baz' THEN 'qux'
    ELSE 'default'
  END AS `col1`,
  CASE
    WHEN `g` = 'foo' THEN 'bar'
    WHEN `g` = 'baz' THEN `g`
    ELSE NULL
  END AS `col2`, *
FROM alltypes"""
        assert result == expected
Example #32
0
    def test_table_names_overlap_default_aliases(self):
        # see discussion in #104; this actually is not needed for query
        # correctness, and only makes the generated SQL nicer
        raise unittest.SkipTest

        t0 = api.table([
            ('key', 'string'),
            ('v1', 'double')
        ], 't1')

        t1 = api.table([
            ('key', 'string'),
            ('v2', 'double')
        ], 't0')

        expr = t0.join(t1, t0.key == t1.key)[t0.key, t0.v1, t1.v2]

        result = to_sql(expr)
        expected = """\
SELECT t2.`key`, t2.`v1`, t3.`v2`
FROM t0 t2
  INNER JOIN t1 t3
    ON t2.`key` = t3.`key`"""

        assert result == expected
Example #33
0
    def test_topk_predicate_pushdown_bug(self):
        # Observed on TPCH data
        cplusgeo = (customer.inner_join(
            nation, [customer.c_nationkey == nation.n_nationkey]).inner_join(
                region,
                [nation.n_regionkey == region.r_regionkey])[customer,
                                                            nation.n_name,
                                                            region.r_name])

        pred = cplusgeo.n_name.topk(10, by=cplusgeo.c_acctbal.sum())
        expr = cplusgeo.filter([pred])

        result = to_sql(expr)
        expected = """\
SELECT t0.*, t1.`n_name`, t2.`r_name`
FROM customer t0
  INNER JOIN nation t1
    ON t0.`c_nationkey` = t1.`n_nationkey`
  INNER JOIN region t2
    ON t1.`n_regionkey` = t2.`r_regionkey`
  LEFT SEMI JOIN (
    SELECT t1.`n_name`, sum(t0.`c_acctbal`) AS `sum`
    FROM customer t0
      INNER JOIN nation t1
        ON t0.`c_nationkey` = t1.`n_nationkey`
      INNER JOIN region t2
        ON t1.`n_regionkey` = t2.`r_regionkey`
    GROUP BY 1
    ORDER BY `sum` DESC
    LIMIT 10
  ) t3
    ON t1.`n_name` = t3.`n_name`"""
        assert result == expected
Example #34
0
    def test_topk_predicate_pushdown_bug(self):
        # Observed on TPCH data
        cplusgeo = (
            customer.inner_join(nation, [customer.c_nationkey ==
                                         nation.n_nationkey])
                    .inner_join(region, [nation.n_regionkey ==
                                         region.r_regionkey])
            [customer, nation.n_name, region.r_name])

        pred = cplusgeo.n_name.topk(10, by=cplusgeo.c_acctbal.sum())
        expr = cplusgeo.filter([pred])

        result = to_sql(expr)
        expected = """\
SELECT t0.*, t1.`n_name`, t2.`r_name`
FROM customer t0
  INNER JOIN nation t1
    ON t0.`c_nationkey` = t1.`n_nationkey`
  INNER JOIN region t2
    ON t1.`n_regionkey` = t2.`r_regionkey`
  LEFT SEMI JOIN (
    SELECT t1.`n_name`, sum(t0.`c_acctbal`) AS `sum`
    FROM customer t0
      INNER JOIN nation t1
        ON t0.`c_nationkey` = t1.`n_nationkey`
      INNER JOIN region t2
        ON t1.`n_regionkey` = t2.`r_regionkey`
    GROUP BY 1
    ORDER BY `sum` DESC
    LIMIT 10
  ) t3
    ON t1.`n_name` = t3.`n_name`"""
        assert result == expected
Example #35
0
    def test_expr_list_no_table_refs(self):
        exlist = ibis.api.expr_list([ibis.literal(1).name('a'),
                                     ibis.now().name('b'),
                                     ibis.literal(2).log().name('c')])
        result = to_sql(exlist)
        expected = """\
SELECT 1 AS `a`, now() AS `b`, ln(2) AS `c`"""
        assert result == expected
Example #36
0
    def test_array_distinct(self):
        t = self.con.table('functional_alltypes')
        expr = t.string_col.distinct()

        result = to_sql(expr)
        expected = """SELECT DISTINCT `string_col`
FROM functional_alltypes"""
        assert result == expected
Example #37
0
    def test_array_distinct(self):
        t = self.con.table('functional_alltypes')
        expr = t.string_col.distinct()

        result = to_sql(expr)
        expected = """SELECT DISTINCT `string_col`
FROM functional_alltypes"""
        assert result == expected
Example #38
0
    def test_nameless_table(self):
        # Ensure that user gets some kind of sensible error
        nameless = api.table([('key', 'string')])
        self.assertRaises(com.RelationError, to_sql, nameless)

        with_name = api.table([('key', 'string')], name='baz')
        result = to_sql(with_name)
        assert result == 'SELECT *\nFROM baz'
Example #39
0
    def test_nameless_table(self):
        # Ensure that user gets some kind of sensible error
        nameless = api.table([('key', 'string')])
        self.assertRaises(com.RelationError, to_sql, nameless)

        with_name = api.table([('key', 'string')], name='baz')
        result = to_sql(with_name)
        assert result == 'SELECT *\nFROM baz'
Example #40
0
    def test_simple_table_distinct(self):
        t = self.con.table('functional_alltypes')

        expr = t[t.string_col, t.int_col].distinct()

        result = to_sql(expr)
        expected = """SELECT DISTINCT `string_col`, `int_col`
FROM functional_alltypes"""
        assert result == expected
Example #41
0
    def test_simple_table_distinct(self):
        t = self.con.table('functional_alltypes')

        expr = t[t.string_col, t.int_col].distinct()

        result = to_sql(expr)
        expected = """SELECT DISTINCT `string_col`, `int_col`
FROM functional_alltypes"""
        assert result == expected
Example #42
0
    def test_identifier_quoting(self):
        data = api.table([('date', 'int32'), ('explain', 'string')], 'table')

        expr = data[data.date.name('else'), data.explain.name('join')]

        result = to_sql(expr)
        expected = """SELECT `date` AS `else`, `explain` AS `join`
FROM `table`"""
        assert result == expected
Example #43
0
    def test_limit(self):
        table = self.con.table('star1').limit(10)
        result = to_sql(table)
        expected = """SELECT *
FROM star1
LIMIT 10"""
        assert result == expected

        table = self.con.table('star1').limit(10, offset=5)
        result = to_sql(table)
        expected = """SELECT *
FROM star1
LIMIT 10 OFFSET 5"""
        assert result == expected

        # Put the limit in a couple places in the stack
        table = self.con.table('star1')
        table = table[table.f > 0].limit(10)
        result = to_sql(table)

        expected = """SELECT *
FROM star1
WHERE `f` > 0
LIMIT 10"""

        assert result == expected

        table = self.con.table('star1')

        # Semantically, this should produce a subquery
        table = table.limit(10)
        table = table[table.f > 0]

        result2 = to_sql(table)

        expected2 = """SELECT *
FROM (
  SELECT *
  FROM star1
  LIMIT 10
) t0
WHERE `f` > 0"""

        assert result2 == expected2
Example #44
0
    def test_limit(self):
        table = self.con.table('star1').limit(10)
        result = to_sql(table)
        expected = """SELECT *
FROM star1
LIMIT 10"""
        assert result == expected

        table = self.con.table('star1').limit(10, offset=5)
        result = to_sql(table)
        expected = """SELECT *
FROM star1
LIMIT 10 OFFSET 5"""
        assert result == expected

        # Put the limit in a couple places in the stack
        table = self.con.table('star1')
        table = table[table.f > 0].limit(10)
        result = to_sql(table)

        expected = """SELECT *
FROM star1
WHERE `f` > 0
LIMIT 10"""

        assert result == expected

        table = self.con.table('star1')

        # Semantically, this should produce a subquery
        table = table.limit(10)
        table = table[table.f > 0]

        result2 = to_sql(table)

        expected2 = """SELECT *
FROM (
  SELECT *
  FROM star1
  LIMIT 10
) t0
WHERE `f` > 0"""

        assert result2 == expected2
Example #45
0
    def test_aggregate_projection_subquery(self):
        t = self.con.table('alltypes')

        proj = t[t.f > 0][t, (t.a + t.b).name('foo')]

        def agg(x):
            return x.aggregate([x.foo.sum().name('foo total')], by=['g'])

        # predicate gets pushed down
        filtered = proj[proj.g == 'bar']

        result = to_sql(filtered)
        expected = """SELECT *, `a` + `b` AS `foo`
FROM alltypes
WHERE `f` > 0 AND
      `g` = 'bar'"""
        assert result == expected

        agged = agg(filtered)
        result = to_sql(agged)
        expected = """SELECT `g`, sum(`foo`) AS `foo total`
FROM (
  SELECT *, `a` + `b` AS `foo`
  FROM alltypes
  WHERE `f` > 0 AND
        `g` = 'bar'
) t0
GROUP BY 1"""
        assert result == expected

        # Pushdown is not possible (in Impala, Postgres, others)
        agged2 = agg(proj[proj.foo < 10])

        result = to_sql(agged2)
        expected = """SELECT t0.`g`, sum(t0.`foo`) AS `foo total`
FROM (
  SELECT *, `a` + `b` AS `foo`
  FROM alltypes
  WHERE `f` > 0
) t0
WHERE t0.`foo` < 10
GROUP BY 1"""
        assert result == expected
Example #46
0
    def test_isnull_case_expr_rewrite_failure(self):
        # #172, case expression that was not being properly converted into an
        # aggregation
        reduction = self.table.g.isnull().ifelse(1, 0).sum()

        result = to_sql(reduction)
        expected = """\
SELECT sum(CASE WHEN `g` IS NULL THEN 1 ELSE 0 END) AS `tmp`
FROM alltypes"""
        assert result == expected
Example #47
0
    def test_expr_list_no_table_refs(self):
        exlist = ibis.api.expr_list([
            ibis.literal(1).name('a'),
            ibis.now().name('b'),
            ibis.literal(2).log().name('c')
        ])
        result = to_sql(exlist)
        expected = """\
SELECT 1 AS `a`, now() AS `b`, ln(2) AS `c`"""
        assert result == expected
Example #48
0
    def test_topk_operation_to_semi_join(self):
        # TODO: top K with filter in place

        table = api.table([
            ('foo', 'string'),
            ('bar', 'string'),
            ('city', 'string'),
            ('v1', 'double'),
            ('v2', 'double'),
        ], 'tbl')

        what = table.city.topk(10, by=table.v2.mean())
        filtered = table[what]

        query = to_sql(filtered)
        expected = """SELECT t0.*
FROM tbl t0
  LEFT SEMI JOIN (
    SELECT `city`, avg(`v2`) AS `mean`
    FROM tbl
    GROUP BY 1
    ORDER BY `mean` DESC
    LIMIT 10
  ) t1
    ON t0.`city` = t1.`city`"""
        assert query == expected

        # Test the default metric (count)

        what = table.city.topk(10)
        filtered2 = table[what]
        query = to_sql(filtered2)
        expected = """SELECT t0.*
FROM tbl t0
  LEFT SEMI JOIN (
    SELECT `city`, count(`city`) AS `count`
    FROM tbl
    GROUP BY 1
    ORDER BY `count` DESC
    LIMIT 10
  ) t1
    ON t0.`city` = t1.`city`"""
        assert query == expected
Example #49
0
    def test_isnull_case_expr_rewrite_failure(self):
        # #172, case expression that was not being properly converted into an
        # aggregation
        reduction = self.table.g.isnull().ifelse(1, 0).sum()

        result = to_sql(reduction)
        expected = """\
SELECT sum(CASE WHEN `g` IS NULL THEN 1 ELSE 0 END) AS `tmp`
FROM alltypes"""
        assert result == expected
Example #50
0
    def test_topk_operation_to_semi_join(self):
        # TODO: top K with filter in place

        table = api.table([
            ('foo', 'string'),
            ('bar', 'string'),
            ('city', 'string'),
            ('v1', 'double'),
            ('v2', 'double'),
        ], 'tbl')

        what = table.city.topk(10, by=table.v2.mean())
        filtered = table[what]

        query = to_sql(filtered)
        expected = """SELECT t0.*
FROM tbl t0
  LEFT SEMI JOIN (
    SELECT `city`, avg(`v2`) AS `mean`
    FROM tbl
    GROUP BY 1
    ORDER BY `mean` DESC
    LIMIT 10
  ) t1
    ON t0.`city` = t1.`city`"""
        assert query == expected

        # Test the default metric (count)

        what = table.city.topk(10)
        filtered2 = table[what]
        query = to_sql(filtered2)
        expected = """SELECT t0.*
FROM tbl t0
  LEFT SEMI JOIN (
    SELECT `city`, count(`city`) AS `count`
    FROM tbl
    GROUP BY 1
    ORDER BY `count` DESC
    LIMIT 10
  ) t1
    ON t0.`city` = t1.`city`"""
        assert query == expected
Example #51
0
    def test_where_with_between(self):
        t = self.con.table('alltypes')

        what = t.filter([t.a > 0, t.f.between(0, 1)])
        result = to_sql(what)
        expected = """SELECT *
FROM alltypes
WHERE `a` > 0 AND
      `f` BETWEEN 0 AND 1"""
        assert result == expected
Example #52
0
    def test_aggregate_projection_subquery(self):
        t = self.con.table('alltypes')

        proj = t[t.f > 0][t, (t.a + t.b).name('foo')]

        def agg(x):
            return x.aggregate([x.foo.sum().name('foo total')], by=['g'])

        # predicate gets pushed down
        filtered = proj[proj.g == 'bar']

        result = to_sql(filtered)
        expected = """SELECT *, `a` + `b` AS `foo`
FROM alltypes
WHERE `f` > 0 AND
      `g` = 'bar'"""
        assert result == expected

        agged = agg(filtered)
        result = to_sql(agged)
        expected = """SELECT `g`, sum(`foo`) AS `foo total`
FROM (
  SELECT *, `a` + `b` AS `foo`
  FROM alltypes
  WHERE `f` > 0 AND
        `g` = 'bar'
) t0
GROUP BY 1"""
        assert result == expected

        # Pushdown is not possible (in Impala, Postgres, others)
        agged2 = agg(proj[proj.foo < 10])

        result = to_sql(agged2)
        expected = """SELECT t0.`g`, sum(t0.`foo`) AS `foo total`
FROM (
  SELECT *, `a` + `b` AS `foo`
  FROM alltypes
  WHERE `f` > 0
) t0
WHERE t0.`foo` < 10
GROUP BY 1"""
        assert result == expected
Example #53
0
    def test_join_just_materialized(self):
        t1 = self.con.table('tpch_nation')
        t2 = self.con.table('tpch_region')
        t3 = self.con.table('tpch_customer')

        # GH #491
        joined = (t1.inner_join(t2, t1.n_regionkey == t2.r_regionkey)
                  .inner_join(t3, t1.n_nationkey == t3.c_nationkey))
        result = to_sql(joined)
        expected = """SELECT *
FROM tpch_nation t0
  INNER JOIN tpch_region t1
    ON t0.`n_regionkey` = t1.`r_regionkey`
  INNER JOIN tpch_customer t2
    ON t0.`n_nationkey` = t2.`c_nationkey`"""
        assert result == expected

        result = to_sql(joined.materialize())
        assert result == expected
Example #54
0
    def test_where_with_between(self):
        t = self.con.table('alltypes')

        what = t.filter([t.a > 0, t.f.between(0, 1)])
        result = to_sql(what)
        expected = """SELECT *
FROM alltypes
WHERE `a` > 0 AND
      `f` BETWEEN 0 AND 1"""
        assert result == expected
Example #55
0
    def test_where_uncorrelated_subquery(self):
        expr = self.foo[self.foo.job.isin(self.bar.job)]

        result = to_sql(expr)
        expected = """SELECT *
FROM foo
WHERE `job` IN (
  SELECT `job`
  FROM bar
)"""
        assert result == expected
Example #56
0
    def test_join_just_materialized(self):
        t1 = self.con.table('tpch_nation')
        t2 = self.con.table('tpch_region')
        t3 = self.con.table('tpch_customer')

        # GH #491
        joined = (t1.inner_join(t2,
                                t1.n_regionkey == t2.r_regionkey).inner_join(
                                    t3, t1.n_nationkey == t3.c_nationkey))
        result = to_sql(joined)
        expected = """SELECT *
FROM tpch_nation t0
  INNER JOIN tpch_region t1
    ON t0.`n_regionkey` = t1.`r_regionkey`
  INNER JOIN tpch_customer t2
    ON t0.`n_nationkey` = t2.`c_nationkey`"""
        assert result == expected

        result = to_sql(joined.materialize())
        assert result == expected
Example #57
0
    def test_where_simple_comparisons(self):
        t1 = self.con.table('star1')

        what = t1.filter([t1.f > 0, t1.c < t1.f * 2])

        result = to_sql(what)
        expected = """SELECT *
FROM star1
WHERE `f` > 0 AND
      `c` < (`f` * 2)"""
        assert result == expected
Example #58
0
    def test_join_no_predicates_for_impala(self):
        # Impala requires that joins without predicates be written explicitly
        # as CROSS JOIN, since result sets can accidentally get too large if a
        # query is executed before predicates are written
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        joined2 = t1.cross_join(t2)[[t1]]

        expected = """SELECT t0.*
FROM star1 t0
  CROSS JOIN star2 t1"""
        result2 = to_sql(joined2)
        assert result2 == expected

        for jtype in ['inner_join', 'left_join', 'outer_join']:
            joined = getattr(t1, jtype)(t2)[[t1]]

            result = to_sql(joined)
            assert result == expected
Example #59
0
    def test_semi_anti_joins(self):
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        joined = t1.semi_join(t2, [t1.foo_id == t2.foo_id])[[t1]]

        result = to_sql(joined)
        expected = """SELECT t0.*
FROM star1 t0
  LEFT SEMI JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""
        assert result == expected

        joined = t1.anti_join(t2, [t1.foo_id == t2.foo_id])[[t1]]
        result = to_sql(joined)
        expected = """SELECT t0.*
FROM star1 t0
  LEFT ANTI JOIN star2 t1
    ON t0.`foo_id` = t1.`foo_id`"""
        assert result == expected
Example #60
0
    def test_where_simple_comparisons(self):
        t1 = self.con.table('star1')

        what = t1.filter([t1.f > 0, t1.c < t1.f * 2])

        result = to_sql(what)
        expected = """SELECT *
FROM star1
WHERE `f` > 0 AND
      `c` < (`f` * 2)"""
        assert result == expected