Exemple #1
0
def test_unsupported_aggregate_functions(alltypes, column, op):
    t = alltypes
    w = ibis.window(order_by=t.d)
    expr = getattr(t[column], op)()
    proj = t.projection([expr.over(w).name('foo')])
    with pytest.raises(com.TranslationError):
        to_sql(proj)
Exemple #2
0
def test_window_rows_with_max_lookback(con):
    t = con.table('alltypes')
    mlb = rows_with_max_lookback(3, ibis.interval(days=3))
    w = ibis.trailing_window(mlb, order_by=t.i)
    expr = t.a.sum().over(w)
    with pytest.raises(NotImplementedError):
        to_sql(expr)
Exemple #3
0
def test_unsupported_aggregate_functions(con, column, op):
    t = con.table('alltypes')
    w = ibis.window(order_by=t.d)
    expr = getattr(t[column], op)()
    proj = t.projection([expr.over(w).name('foo')])
    with pytest.raises(com.TranslationError):
        to_sql(proj)
Exemple #4
0
def test_cumulative_functions(alltypes, cumulative, static):
    t = alltypes

    w = ibis.window(order_by=t.d)

    actual = cumulative(t, w).name('foo')
    expected = static(t, w).over(ibis.cumulative_window()).name('foo')

    expr1 = t.projection(actual)
    expr2 = t.projection(expected)

    assert to_sql(expr1) == to_sql(expr2)
Exemple #5
0
def test_cumulative_functions(alltypes, cumulative, static):
    t = alltypes

    w = ibis.window(order_by=t.d)

    actual = cumulative(t, w).name('foo')
    expected = static(t, w).over(ibis.cumulative_window()).name('foo')

    expr1 = t.projection(actual)
    expr2 = t.projection(expected)

    assert to_sql(expr1) == to_sql(expr2)
Exemple #6
0
    def test_unsupported_aggregate_functions(self):
        t = self.con.table('alltypes')
        w = ibis.window(order_by=t.d)

        exprs = [
            t.f.approx_nunique(),
            t.f.approx_median(),
            t.g.group_concat(),
        ]

        for expr in exprs:
            with self.assertRaises(com.TranslationError):
                proj = t.projection([expr.over(w).name('foo')])
                to_sql(proj)
Exemple #7
0
    def test_isin_notin_in_select(self):
        filtered = self.table[self.table.g.isin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` IN ('foo', 'bar')"""
        assert result == expected

        filtered = self.table[self.table.g.notin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` NOT IN ('foo', 'bar')"""
        assert result == expected
Exemple #8
0
    def test_isin_notin_in_select(self):
        filtered = self.table[self.table.g.isin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` IN ('foo', 'bar')"""
        assert result == expected

        filtered = self.table[self.table.g.notin(["foo", "bar"])]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` NOT IN ('foo', 'bar')"""
        assert result == expected
Exemple #9
0
    def test_unsupported_aggregate_functions(self):
        t = self.con.table('alltypes')
        w = ibis.window(order_by=t.d)

        exprs = [
            t.f.approx_nunique(),
            t.f.approx_median(),
            t.g.group_concat(),
        ]

        for expr in exprs:
            with self.assertRaises(com.TranslationError):
                proj = t.projection([expr.over(w).name('foo')])
                to_sql(proj)
Exemple #10
0
def test_nested_join_multiple_ctes():
    ratings = ibis.table(
        [
            ('userid', 'int64'),
            ('movieid', 'int64'),
            ('rating', 'int8'),
            ('timestamp', 'string'),
        ],
        name='ratings',
    )
    movies = ibis.table(
        [('movieid', 'int64'), ('title', 'string')], name='movies'
    )

    expr = ratings.timestamp.cast('timestamp')
    ratings2 = ratings['userid', 'movieid', 'rating', expr.name('datetime')]
    joined2 = ratings2.join(movies, ['movieid'])[ratings2, movies['title']]
    joined3 = joined2.filter(
        [joined2.userid == 118205, joined2.datetime.year() > 2001]
    )
    top_user_old_movie_ids = joined3.filter(
        [joined3.userid == 118205, joined3.datetime.year() < 2009]
    )[['movieid']]
    # projection from a filter was hiding an insidious bug, so we're disabling
    # that for now see issue #1295
    cond = joined3.movieid.isin(top_user_old_movie_ids.movieid)
    result = joined3[cond]

    expected = """\
WITH t0 AS (
  SELECT `userid`, `movieid`, `rating`,
         CAST(`timestamp` AS timestamp) AS `datetime`
  FROM ratings
),
t1 AS (
  SELECT t0.*, t5.`title`
  FROM t0
    INNER JOIN movies t5
      ON t0.`movieid` = t5.`movieid`
)
SELECT t2.*
FROM (
  SELECT t1.*
  FROM t1
  WHERE (t1.`userid` = 118205) AND
        (extract(t1.`datetime`, 'year') > 2001)
) t2
WHERE t2.`movieid` IN (
  SELECT `movieid`
  FROM (
    SELECT t1.*
    FROM t1
    WHERE (t1.`userid` = 118205) AND
          (extract(t1.`datetime`, 'year') > 2001) AND
          (t1.`userid` = 118205) AND
          (extract(t1.`datetime`, 'year') < 2009)
  ) t4
)"""
    compiled_result = to_sql(result)
    assert compiled_result == expected
Exemple #11
0
def test_join_aliasing():
    test = ibis.table([
        ('a', 'int64'),
        ('b', 'int64'),
        ('c', 'int64'),
    ],
                      name='test_table')
    test = test.mutate(d=test.a + 20)
    test2 = test[test.d, test.c]
    idx = (test2.d / 15).cast('int64').name('idx')
    test3 = (test2.groupby([test2.d, idx,
                            test2.c]).aggregate(row_count=test2.count()))
    test3_totals = test3.groupby(
        test3.d).aggregate(total=test3.row_count.sum())
    test4 = test3.join(test3_totals,
                       test3.d == test3_totals.d)[test3, test3_totals.total]
    test5 = test4[test4.row_count < test4.total / 2]
    agg = test.groupby([test.d,
                        test.b]).aggregate(count=test.count(),
                                           unique=test.c.nunique()).view()
    joined = agg.join(test5, agg.d == test5.d)[agg, test5.total]
    result = joined
    result = to_sql(result)
    expected = """\
WITH t0 AS (
  SELECT *, `a` + 20 AS `d`
  FROM test_table
),
t1 AS (
  SELECT `d`, `c`
  FROM t0
),
t2 AS (
  SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(*) AS `row_count`
  FROM t1
  GROUP BY 1, 2, 3
)
SELECT t3.*, t4.`total`
FROM (
  SELECT `d`, `b`, count(*) AS `count`, count(DISTINCT `c`) AS `unique`
  FROM t0
  GROUP BY 1, 2
) t3
  INNER JOIN (
    SELECT t5.*
    FROM (
      SELECT t2.*, t8.`total`
      FROM t2
        INNER JOIN (
          SELECT `d`, sum(`row_count`) AS `total`
          FROM t2
          GROUP BY 1
        ) t8
          ON t2.`d` = t8.`d`
    ) t5
    WHERE t5.`row_count` < (t5.`total` / 2)
  ) t4
    ON t3.`d` = t4.`d`"""
    assert result == expected
Exemple #12
0
    def test_isin_notin_in_select(self):
        values = ['foo', 'bar']
        values_formatted = tuple(set(values))

        filtered = self.table[self.table.g.isin(values)]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` IN {}"""
        assert result == expected.format(values_formatted)

        filtered = self.table[self.table.g.notin(values)]
        result = to_sql(filtered)
        expected = """SELECT *
FROM alltypes
WHERE `g` NOT IN {}"""
        assert result == expected.format(values_formatted)
Exemple #13
0
    def test_identical_to(self):
        t = self.con.table('functional_alltypes')
        expr = t.tinyint_col.identical_to(t.double_col)
        result = to_sql(expr)
        expected = """\
SELECT `tinyint_col` IS NOT DISTINCT FROM `double_col` AS `tmp`
FROM functional_alltypes"""
        assert result == expected
Exemple #14
0
def test_is_parens_identical_to():
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    expr = t[t.a.identical_to(None) == t.b.identical_to(None)]

    result = to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` IS NOT DISTINCT FROM NULL) = (`b` IS NOT DISTINCT FROM NULL)"""
    assert result == expected
Exemple #15
0
def test_cumulative_functions(con):
    t = con.table('alltypes')

    w = ibis.window(order_by=t.d)
    exprs = [
        (t.f.cumsum().over(w), t.f.sum().over(w)),
        (t.f.cummin().over(w), t.f.min().over(w)),
        (t.f.cummax().over(w), t.f.max().over(w)),
        (t.f.cummean().over(w), t.f.mean().over(w)),
    ]

    for cumulative, static in exprs:
        actual = cumulative.name('foo')
        expected = static.over(ibis.cumulative_window()).name('foo')

        expr1 = t.projection(actual)
        expr2 = t.projection(expected)

        assert to_sql(expr1) == to_sql(expr2)
Exemple #16
0
def test_cumulative_functions(con):
    t = con.table('alltypes')

    w = ibis.window(order_by=t.d)
    exprs = [
        (t.f.cumsum().over(w), t.f.sum().over(w)),
        (t.f.cummin().over(w), t.f.min().over(w)),
        (t.f.cummax().over(w), t.f.max().over(w)),
        (t.f.cummean().over(w), t.f.mean().over(w)),
    ]

    for cumulative, static in exprs:
        actual = cumulative.name('foo')
        expected = static.over(ibis.cumulative_window()).name('foo')

        expr1 = t.projection(actual)
        expr2 = t.projection(expected)

        assert to_sql(expr1) == to_sql(expr2)
Exemple #17
0
def test_is_parens_identical_to():
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    expr = t[t.a.identical_to(None) == t.b.identical_to(None)]

    result = to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` IS NOT DISTINCT FROM NULL) = (`b` IS NOT DISTINCT FROM NULL)"""
    assert result == expected
Exemple #18
0
def compile(expr, params=None):
    """Force compilation of expression.

    Returns
    -------
    str

    """
    from ibis.impala.compiler import to_sql

    return to_sql(expr, dialect.make_context(params=params))
    def test_join_no_predicates_for_impala(self):
        # Impala requires that joins without predicates be written explicitly
        # as CROSS JOIN, since result sets can accidentally get too large if a
        # query is executed before predicates are written
        t1 = self.con.table('star1')
        t2 = self.con.table('star2')

        joined2 = t1.cross_join(t2)[[t1]]

        expected = """SELECT t0.*
FROM star1 t0
  CROSS JOIN star2 t1"""
        result2 = to_sql(joined2)
        assert result2 == expected

        for jtype in ['inner_join', 'left_join', 'outer_join']:
            joined = getattr(t1, jtype)(t2)[[t1]]

            result = to_sql(joined)
            assert result == expected
Exemple #20
0
def compile(expr, params=None):
    """
    Force compilation of expression as though it were an expression depending
    on Impala. Note you can also call expr.compile()

    Returns
    -------
    compiled : string
    """
    from ibis.impala.compiler import to_sql
    return to_sql(expr, dialect.make_context(params=params))
Exemple #21
0
    def test_relabel_projection(self):
        # GH #551
        types = ['int32', 'string', 'double']
        table = ibis.table(zip(['foo', 'bar', 'baz'], types), 'table')
        relabeled = table.relabel({'foo': 'one', 'baz': 'three'})

        result = to_sql(relabeled)
        expected = """\
SELECT `foo` AS `one`, `bar`, `baz` AS `three`
FROM `table`"""
        assert result == expected
Exemple #22
0
    def test_relabel_projection(self):
        # GH #551
        types = ['int32', 'string', 'double']
        table = ibis.table(zip(['foo', 'bar', 'baz'], types), name='table')
        relabeled = table.relabel({'foo': 'one', 'baz': 'three'})

        result = to_sql(relabeled)
        expected = """\
SELECT `foo` AS `one`, `bar`, `baz` AS `three`
FROM `table`"""
        assert result == expected
Exemple #23
0
def test_is_parens(method, sql):
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    func = operator.methodcaller(method)
    expr = t[func(t.a) == func(t.b)]

    result = to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` {sql} NULL) = (`b` {sql} NULL)""".format(sql=sql)
    assert result == expected
Exemple #24
0
def test_is_parens(method, sql):
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    func = operator.methodcaller(method)
    expr = t[func(t.a) == func(t.b)]

    result = to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` {sql} NULL) = (`b` {sql} NULL)""".format(
        sql=sql
    )
    assert result == expected
Exemple #25
0
def test_join_with_nested_xor_condition():
    t1 = ibis.table([('a', 'string'), ('b', 'string')], 't')
    t2 = t1.view()

    joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)])
    expr = joined[t1]

    expected = """\
SELECT t0.*
FROM t t0
  INNER JOIN t t1
    ON (t0.`a` = t1.`a`) AND
       (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`)))"""  # noqa: E501
    assert to_sql(expr) == expected
Exemple #26
0
def test_join_with_nested_xor_condition():
    t1 = ibis.table([('a', 'string'), ('b', 'string')], 't')
    t2 = t1.view()

    joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)])
    expr = joined[t1]

    expected = """\
SELECT t0.*
FROM t t0
  INNER JOIN t t1
    ON (t0.`a` = t1.`a`) AND
       (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`)))"""  # noqa: E501
    assert to_sql(expr) == expected
Exemple #27
0
def test_logically_negate_complex_boolean_expr():
    t = ibis.table(
        [('a', 'string'), ('b', 'double'), ('c', 'int64'), ('d', 'string')],
        name='t',
    )

    def f(t):
        return t.a.isin(['foo']) & t.c.notnull()

    expr = f(t)
    result = to_sql(~expr)
    expected = """\
SELECT NOT (`a` IN ('foo') AND (`c` IS NOT NULL)) AS `tmp`
FROM t"""
    assert result == expected
Exemple #28
0
def test_logically_negate_complex_boolean_expr():
    t = ibis.table(
        [('a', 'string'), ('b', 'double'), ('c', 'int64'), ('d', 'string')],
        name='t',
    )

    def f(t):
        return t.a.isin(['foo']) & t.c.notnull()

    expr = f(t)
    result = to_sql(~expr)
    expected = """\
SELECT NOT (`a` IN ('foo') AND (`c` IS NOT NULL)) AS `tmp`
FROM t"""
    assert result == expected
Exemple #29
0
    def _check_impala_output_types_match(self, table):
        query = to_sql(table)
        t = self.con.sql(query)

        def _clean_type(x):
            if isinstance(x, Category):
                x = x.to_integer_type()
            return x

        left, right = t.schema(), table.schema()
        for i, (n, l, r) in enumerate(zip(left.names, left.types, right.types)):
            l = _clean_type(l)
            r = _clean_type(r)

            if l != r:
                pytest.fail("Value for {0} had left type {1}" " and right type {2}".format(n, l, r))
Exemple #30
0
    def test_timestamp_extract_field(self):
        fields = ["year", "month", "day", "hour", "minute", "second", "millisecond"]

        cases = [(getattr(self.table.i, field)(), "extract(`i`, '{0}')".format(field)) for field in fields]
        self._check_expr_cases(cases)

        # integration with SQL translation
        expr = self.table[
            self.table.i.year().name("year"), self.table.i.month().name("month"), self.table.i.day().name("day")
        ]

        result = to_sql(expr)
        expected = """SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`,
       extract(`i`, 'day') AS `day`
FROM alltypes"""
        assert result == expected
Exemple #31
0
def test_nested_joins_single_cte():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')

    counts = t.group_by('uuid').size()

    last_visit = t.group_by('uuid').aggregate(last_visit=t.ts.max())

    max_counts = counts.group_by('uuid').aggregate(
        max_count=counts['count'].max()
    )

    main_kw = max_counts.left_join(
        counts,
        ['uuid', max_counts.max_count == counts['count']]
    ).projection([counts])

    result = main_kw.left_join(last_visit, 'uuid').projection([
        main_kw,
        last_visit.last_visit,
    ])

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t1.*, t2.`last_visit`
FROM (
  SELECT t0.*
  FROM (
    SELECT `uuid`, max(`count`) AS `max_count`
    FROM t0
    GROUP BY 1
  ) t3
    LEFT OUTER JOIN t0
      ON t3.`uuid` = t0.`uuid` AND
         t3.`max_count` = t0.`count`
) t1
  LEFT OUTER JOIN (
    SELECT `uuid`, max(`ts`) AS `last_visit`
    FROM t
    GROUP BY 1
  ) t2
    ON t1.`uuid` = t2.`uuid`"""
    compiled_result = to_sql(result)
    assert compiled_result == expected
Exemple #32
0
def test_nested_joins_single_cte():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')

    counts = t.group_by('uuid').size()

    last_visit = t.group_by('uuid').aggregate(last_visit=t.ts.max())

    max_counts = counts.group_by('uuid').aggregate(
        max_count=counts['count'].max()
    )

    main_kw = max_counts.left_join(
        counts,
        ['uuid', max_counts.max_count == counts['count']]
    ).projection([counts])

    result = main_kw.left_join(last_visit, 'uuid').projection([
        main_kw,
        last_visit.last_visit,
    ])

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t1.*, t2.`last_visit`
FROM (
  SELECT t0.*
  FROM (
    SELECT `uuid`, max(`count`) AS `max_count`
    FROM t0
    GROUP BY 1
  ) t3
    LEFT OUTER JOIN t0
      ON t3.`uuid` = t0.`uuid` AND
         t3.`max_count` = t0.`count`
) t1
  LEFT OUTER JOIN (
    SELECT `uuid`, max(`ts`) AS `last_visit`
    FROM t
    GROUP BY 1
  ) t2
    ON t1.`uuid` = t2.`uuid`"""
    compiled_result = to_sql(result)
    assert compiled_result == expected
Exemple #33
0
    def _check_impala_output_types_match(self, table):
        query = to_sql(table)
        t = self.con.sql(query)

        def _clean_type(x):
            if isinstance(x, Category):
                x = x.to_integer_type()
            return x

        left, right = t.schema(), table.schema()
        for i, (n, l, r) in enumerate(zip(left.names, left.types,
                                          right.types)):
            l = _clean_type(l)
            r = _clean_type(r)

            if l != r:
                pytest.fail('Value for {0} had left type {1}'
                            ' and right type {2}'.format(n, l, r))
Exemple #34
0
def test_multiple_filters():
    t = ibis.table([('a', 'int64'), ('b', 'string')], name='t0')
    filt = t[t.a < 100]
    expr = filt[filt.a == filt.a.max()]
    result = to_sql(expr)
    expected = """\
SELECT *
FROM (
  SELECT *
  FROM t0
  WHERE `a` < 100
) t0
WHERE `a` = (
  SELECT max(`a`) AS `max`
  FROM t0
  WHERE `a` < 100
)"""
    assert result == expected
Exemple #35
0
def test_multiple_filters():
    t = ibis.table([('a', 'int64'), ('b', 'string')], name='t0')
    filt = t[t.a < 100]
    expr = filt[filt.a == filt.a.max()]
    result = to_sql(expr)
    expected = """\
SELECT *
FROM (
  SELECT *
  FROM t0
  WHERE `a` < 100
) t0
WHERE `a` = (
  SELECT max(`a`) AS `max`
  FROM t0
  WHERE `a` < 100
)"""
    assert result == expected
Exemple #36
0
    def test_timestamp_extract_field(self):
        fields = ['year', 'month', 'day', 'hour', 'minute',
                  'second', 'millisecond']

        cases = [(getattr(self.table.i, field)(),
                  "extract(`i`, '{0}')".format(field))
                 for field in fields]
        self._check_expr_cases(cases)

        # integration with SQL translation
        expr = self.table[self.table.i.year().name('year'),
                          self.table.i.month().name('month'),
                          self.table.i.day().name('day')]

        result = to_sql(expr)
        expected = \
            """SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`,
       extract(`i`, 'day') AS `day`
FROM alltypes"""
        assert result == expected
Exemple #37
0
    def test_bucket_assign_labels(self):
        buckets = [0, 10, 25, 50]
        bucket = self.table.f.bucket(buckets, include_under=True)

        size = self.table.group_by(bucket.name('tier')).size()
        labelled = size.tier.label(['Under 0', '0 to 10',
                                    '10 to 25', '25 to 50'],
                                   nulls='error').name('tier2')
        expr = size[labelled, size['count']]

        expected = """\
SELECT
  CASE `tier`
    WHEN 0 THEN 'Under 0'
    WHEN 1 THEN '0 to 10'
    WHEN 2 THEN '10 to 25'
    WHEN 3 THEN '25 to 50'
    ELSE 'error'
  END AS `tier2`, `count`
FROM (
  SELECT
    CASE
      WHEN `f` < 0 THEN 0
      WHEN (`f` >= 0) AND (`f` < 10) THEN 1
      WHEN (`f` >= 10) AND (`f` < 25) THEN 2
      WHEN (`f` >= 25) AND (`f` <= 50) THEN 3
      ELSE NULL
    END AS `tier`, count(*) AS `count`
  FROM alltypes
  GROUP BY 1
) t0"""

        result = to_sql(expr)

        assert result == expected

        self.assertRaises(ValueError, size.tier.label, ['a', 'b', 'c'])
        self.assertRaises(ValueError, size.tier.label,
                          ['a', 'b', 'c', 'd', 'e'])
Exemple #38
0
    def test_bucket_assign_labels(self):
        buckets = [0, 10, 25, 50]
        bucket = self.table.f.bucket(buckets, include_under=True)

        size = self.table.group_by(bucket.name('tier')).size()
        labelled = size.tier.label(['Under 0', '0 to 10',
                                    '10 to 25', '25 to 50'],
                                   nulls='error').name('tier2')
        expr = size[labelled, size['count']]

        expected = """\
SELECT
  CASE `tier`
    WHEN 0 THEN 'Under 0'
    WHEN 1 THEN '0 to 10'
    WHEN 2 THEN '10 to 25'
    WHEN 3 THEN '25 to 50'
    ELSE 'error'
  END AS `tier2`, `count`
FROM (
  SELECT
    CASE
      WHEN `f` < 0 THEN 0
      WHEN (`f` >= 0) AND (`f` < 10) THEN 1
      WHEN (`f` >= 10) AND (`f` < 25) THEN 2
      WHEN (`f` >= 25) AND (`f` <= 50) THEN 3
      ELSE NULL
    END AS `tier`, count(*) AS `count`
  FROM alltypes
  GROUP BY 1
) t0"""

        result = to_sql(expr)

        assert result == expected

        self.assertRaises(ValueError, size.tier.label, ['a', 'b', 'c'])
        self.assertRaises(ValueError, size.tier.label,
                          ['a', 'b', 'c', 'd', 'e'])
Exemple #39
0
def test_nested_join_base():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')
    counts = t.group_by('uuid').size()
    max_counts = counts.group_by('uuid').aggregate(
        max_count=lambda x: x['count'].max())
    result = max_counts.left_join(counts, 'uuid').projection([counts])
    compiled_result = to_sql(result)

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t0.*
FROM (
  SELECT `uuid`, max(`count`) AS `max_count`
  FROM t0
  GROUP BY 1
) t1
  LEFT OUTER JOIN t0
    ON t1.`uuid` = t0.`uuid`"""
    assert compiled_result == expected
Exemple #40
0
    def test_bucket_assign_labels(self):
        buckets = [0, 10, 25, 50]
        bucket = self.table.f.bucket(buckets, include_under=True)

        size = self.table.group_by(bucket.name("tier")).size()
        labelled = size.tier.label(["Under 0", "0 to 10", "10 to 25", "25 to 50"], nulls="error").name("tier2")
        expr = size[labelled, size["count"]]

        expected = """\
SELECT
  CASE `tier`
    WHEN 0 THEN 'Under 0'
    WHEN 1 THEN '0 to 10'
    WHEN 2 THEN '10 to 25'
    WHEN 3 THEN '25 to 50'
    ELSE 'error'
  END AS `tier2`, `count`
FROM (
  SELECT
    CASE
      WHEN `f` < 0 THEN 0
      WHEN (`f` >= 0) AND (`f` < 10) THEN 1
      WHEN (`f` >= 10) AND (`f` < 25) THEN 2
      WHEN (`f` >= 25) AND (`f` <= 50) THEN 3
      ELSE NULL
    END AS `tier`, count(*) AS `count`
  FROM alltypes
  GROUP BY 1
) t0"""

        result = to_sql(expr)

        assert result == expected

        self.assertRaises(ValueError, size.tier.label, ["a", "b", "c"])
        self.assertRaises(ValueError, size.tier.label, ["a", "b", "c", "d", "e"])
Exemple #41
0
def test_nested_join_base():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')
    counts = t.group_by('uuid').size()
    max_counts = counts.group_by('uuid').aggregate(
        max_count=lambda x: x['count'].max()
    )
    result = max_counts.left_join(counts, 'uuid').projection([counts])
    compiled_result = to_sql(result)

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t0.*
FROM (
  SELECT `uuid`, max(`count`) AS `max_count`
  FROM t0
  GROUP BY 1
) t1
  LEFT OUTER JOIN t0
    ON t1.`uuid` = t0.`uuid`"""
    assert compiled_result == expected
Exemple #42
0
 def assert_cases_equality(self, cases):
     for expr, expected in cases:
         result = self.con.execute(expr)
         assert result == expected, to_sql(expr)
Exemple #43
0
def assert_sql_equal(expr, expected):
    result = to_sql(expr)
    assert result == expected
Exemple #44
0
 def _check_sql(self, expr, expected):
     result = to_sql(expr)
     assert result == expected
Exemple #45
0
def test_join_aliasing():
    test = ibis.table(
        [('a', 'int64'), ('b', 'int64'), ('c', 'int64')], name='test_table'
    )
    test = test.mutate(d=test.a + 20)
    test2 = test[test.d, test.c]
    idx = (test2.d / 15).cast('int64').name('idx')
    test3 = test2.groupby([test2.d, idx, test2.c]).aggregate(
        row_count=test2.count()
    )
    test3_totals = test3.groupby(test3.d).aggregate(
        total=test3.row_count.sum()
    )
    test4 = test3.join(test3_totals, test3.d == test3_totals.d)[
        test3, test3_totals.total
    ]
    test5 = test4[test4.row_count < test4.total / 2]
    agg = (
        test.groupby([test.d, test.b])
        .aggregate(count=test.count(), unique=test.c.nunique())
        .view()
    )
    joined = agg.join(test5, agg.d == test5.d)[agg, test5.total]
    result = joined
    result = to_sql(result)
    expected = """\
WITH t0 AS (
  SELECT *, `a` + 20 AS `d`
  FROM test_table
),
t1 AS (
  SELECT `d`, `c`
  FROM t0
),
t2 AS (
  SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(*) AS `row_count`
  FROM t1
  GROUP BY 1, 2, 3
)
SELECT t3.*, t4.`total`
FROM (
  SELECT `d`, `b`, count(*) AS `count`, count(DISTINCT `c`) AS `unique`
  FROM t0
  GROUP BY 1, 2
) t3
  INNER JOIN (
    SELECT t5.*
    FROM (
      SELECT t2.*, t8.`total`
      FROM t2
        INNER JOIN (
          SELECT `d`, sum(`row_count`) AS `total`
          FROM t2
          GROUP BY 1
        ) t8
          ON t2.`d` = t8.`d`
    ) t5
    WHERE t5.`row_count` < (t5.`total` / 2)
  ) t4
    ON t3.`d` = t4.`d`"""
    assert result == expected
Exemple #46
0
def assert_sql_equal(expr, expected):
    result = to_sql(expr)
    assert result == expected
Exemple #47
0
def test_decimal_builtins(con, expr, expected):
    result = con.execute(expr)
    assert result == expected, to_sql(expr)
Exemple #48
0
 def assert_cases_equality(self, cases):
     for expr, expected in cases:
         result = self.con.execute(expr)
         assert result == expected, to_sql(expr)
Exemple #49
0
def test_timestamp_builtins(con, expr, expected):
    result = con.execute(expr)
    assert result == expected, to_sql(expr)
Exemple #50
0
 def test_identical_to_special_case(self):
     expr = ibis.NA.cast('int64').identical_to(ibis.NA.cast('int64'))
     result = to_sql(expr)
     assert result == 'SELECT TRUE AS `tmp`'
Exemple #51
0
 def _compare_sql(self, e1, e2):
     s1 = to_sql(e1)
     s2 = to_sql(e2)
     assert s1 == s2
Exemple #52
0
 def _check_sql(self, expr, expected):
     result = to_sql(expr)
     assert result == expected