Exemple #1
0
def test_window_rows_with_max_lookback(con):
    t = con.table('alltypes')
    mlb = rows_with_max_lookback(3, ibis.interval(days=3))
    w = ibis.trailing_window(mlb, order_by=t.i)
    expr = t.a.sum().over(w)
    with pytest.raises(NotImplementedError):
        ImpalaCompiler.to_sql(expr)
Exemple #2
0
def test_unsupported_aggregate_functions(alltypes, column, op):
    t = alltypes
    w = ibis.window(order_by=t.d)
    expr = getattr(t[column], op)()
    proj = t.projection([expr.over(w).name('foo')])
    with pytest.raises(com.TranslationError):
        ImpalaCompiler.to_sql(proj)
Exemple #3
0
def test_cumulative_functions(alltypes, cumulative, static):
    t = alltypes

    w = ibis.window(order_by=t.d)

    actual = cumulative(t, w).name('foo')
    expected = static(t, w).over(ibis.cumulative_window()).name('foo')

    expr1 = t.projection(actual)
    expr2 = t.projection(expected)

    assert ImpalaCompiler.to_sql(expr1) == ImpalaCompiler.to_sql(expr2)
Exemple #4
0
    def test_ctas_ddl(self):
        con = MockConnection()

        select = ImpalaCompiler.to_ast(con.table('test1')).queries[0]
        statement = ksupport.CTASKudu(
            'another_table',
            'kudu_name',
            ['dom.d.com:7051'],
            select,
            ['string_col'],
            external=True,
            can_exist=False,
            database='foo',
        )
        result = statement.compile()

        expected = """\
CREATE EXTERNAL TABLE foo.`another_table`
TBLPROPERTIES (
  'kudu.key_columns'='string_col',
  'kudu.master_addresses'='dom.d.com:7051',
  'kudu.table_name'='kudu_name',
  'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'
) AS
SELECT *
FROM test1"""
        assert result == expected
Exemple #5
0
def test_isin_notin_in_select(table):
    values = {'foo', 'bar'}
    values_formatted = tuple(values)

    filtered = table[table.g.isin(values)]
    result = ImpalaCompiler.to_sql(filtered)
    expected = f"""SELECT *
FROM alltypes
WHERE `g` IN {values_formatted}"""
    assert result == expected

    filtered = table[table.g.notin(values)]
    result = ImpalaCompiler.to_sql(filtered)
    expected = f"""SELECT *
FROM alltypes
WHERE `g` NOT IN {values_formatted}"""
    assert result == expected
Exemple #6
0
def test_identical_to(con):
    t = con.table('functional_alltypes')
    expr = t.tinyint_col.identical_to(t.double_col)
    result = ImpalaCompiler.to_sql(expr)
    expected = """\
SELECT `tinyint_col` IS NOT DISTINCT FROM `double_col` AS `tmp`
FROM ibis_testing.`functional_alltypes`"""
    assert result == expected
Exemple #7
0
def test_is_parens_identical_to():
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    expr = t[t.a.identical_to(None) == t.b.identical_to(None)]

    result = ImpalaCompiler.to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` IS NOT DISTINCT FROM NULL) = (`b` IS NOT DISTINCT FROM NULL)"""
    assert result == expected
Exemple #8
0
def test_join_aliasing():
    test = ibis.table([('a', 'int64'), ('b', 'int64'), ('c', 'int64')],
                      name='test_table')
    test = test.mutate(d=test.a + 20)
    test2 = test[test.d, test.c]
    idx = (test2.d / 15).cast('int64').name('idx')
    test3 = test2.groupby([test2.d, idx,
                           test2.c]).aggregate(row_count=test2.count())
    test3_totals = test3.groupby(
        test3.d).aggregate(total=test3.row_count.sum())
    test4 = test3.join(test3_totals,
                       test3.d == test3_totals.d)[test3, test3_totals.total]
    test5 = test4[test4.row_count < test4.total / 2]
    agg = (test.groupby([test.d,
                         test.b]).aggregate(count=test.count(),
                                            unique=test.c.nunique()).view())
    joined = agg.join(test5, agg.d == test5.d)[agg, test5.total]
    result = joined
    result = ImpalaCompiler.to_sql(result)
    expected = """\
WITH t0 AS (
  SELECT `d`, `c`
  FROM t2
),
t1 AS (
  SELECT `d`, CAST(`d` / 15 AS bigint) AS `idx`, `c`, count(*) AS `row_count`
  FROM t0
  GROUP BY 1, 2, 3
),
t2 AS (
  SELECT *, `a` + 20 AS `d`
  FROM test_table
)
SELECT t3.*, t4.`total`
FROM (
  SELECT `d`, `b`, count(*) AS `count`, count(DISTINCT `c`) AS `unique`
  FROM t2
  GROUP BY 1, 2
) t3
  INNER JOIN (
    SELECT t5.*
    FROM (
      SELECT t1.*, t8.`total`
      FROM t1
        INNER JOIN (
          SELECT `d`, sum(`row_count`) AS `total`
          FROM t1
          GROUP BY 1
        ) t8
          ON t1.`d` = t8.`d`
    ) t5
    WHERE t5.`row_count` < (t5.`total` / 2)
  ) t4
    ON t3.`d` = t4.`d`"""
    assert result == expected
Exemple #9
0
def test_relabel_projection():
    # GH #551
    types = ['int32', 'string', 'double']
    table = ibis.table(zip(['foo', 'bar', 'baz'], types), name='table')
    relabeled = table.relabel({'foo': 'one', 'baz': 'three'})

    result = ImpalaCompiler.to_sql(relabeled)
    expected = """\
SELECT `foo` AS `one`, `bar`, `baz` AS `three`
FROM `table`"""
    assert result == expected
Exemple #10
0
def test_is_parens(method, sql):
    t = ibis.table([('a', 'string'), ('b', 'string')], 'table')
    func = operator.methodcaller(method)
    expr = t[func(t.a) == func(t.b)]

    result = ImpalaCompiler.to_sql(expr)
    expected = """\
SELECT *
FROM `table`
WHERE (`a` {sql} NULL) = (`b` {sql} NULL)""".format(sql=sql)
    assert result == expected
Exemple #11
0
def test_join_no_predicates_for_impala(con):
    # Impala requires that joins without predicates be written explicitly
    # as CROSS JOIN, since result sets can accidentally get too large if a
    # query is executed before predicates are written
    t1 = con.table('star1')
    t2 = con.table('star2')

    joined2 = t1.cross_join(t2)[[t1]]

    expected = """\
SELECT t0.*
FROM star1 t0
  CROSS JOIN star2 t1"""
    result2 = ImpalaCompiler.to_sql(joined2)
    assert result2 == expected

    for jtype in ['inner_join', 'left_join', 'outer_join']:
        joined = getattr(t1, jtype)(t2)[[t1]]

        result = ImpalaCompiler.to_sql(joined)
        assert result == expected
Exemple #12
0
def test_sql_extract(table):
    # integration with SQL translation
    expr = table[table.i.year().name('year'),
                 table.i.month().name('month'),
                 table.i.day().name('day'), ]

    result = ImpalaCompiler.to_sql(expr)
    expected = """\
SELECT extract(`i`, 'year') AS `year`, extract(`i`, 'month') AS `month`,
       extract(`i`, 'day') AS `day`
FROM alltypes"""
    assert result == expected
Exemple #13
0
def test_join_with_nested_xor_condition():
    t1 = ibis.table([('a', 'string'), ('b', 'string')], 't')
    t2 = t1.view()

    joined = t1.join(t2, [t1.a == t2.a, (t1.a != t2.b) ^ (t1.b != t2.a)])
    expr = joined[t1]

    expected = """\
SELECT t0.*
FROM t t0
  INNER JOIN t t1
    ON (t0.`a` = t1.`a`) AND
       (((t0.`a` != t1.`b`) OR (t0.`b` != t1.`a`)) AND NOT ((t0.`a` != t1.`b`) AND (t0.`b` != t1.`a`)))"""  # noqa: E501
    assert ImpalaCompiler.to_sql(expr) == expected
Exemple #14
0
def test_column_ref_table_aliases():
    context = ImpalaCompiler.make_context()

    table1 = ibis.table([('key1', 'string'), ('value1', 'double')])

    table2 = ibis.table([('key2', 'string'), ('value and2', 'double')])

    context.set_ref(table1, 't0')
    context.set_ref(table2, 't1')

    expr = table1['value1'] - table2['value and2']

    result = translate(expr, context=context)
    expected = 't0.`value1` - t1.`value and2`'
    assert result == expected
Exemple #15
0
def test_logically_negate_complex_boolean_expr():
    t = ibis.table(
        [('a', 'string'), ('b', 'double'), ('c', 'int64'), ('d', 'string')],
        name='t',
    )

    def f(t):
        return t.a.isin(['foo']) & t.c.notnull()

    expr = f(t)
    result = ImpalaCompiler.to_sql(~expr)
    expected = """\
SELECT NOT (`a` IN ('foo') AND (`c` IS NOT NULL)) AS `tmp`
FROM t"""
    assert result == expected
Exemple #16
0
def _create_table(table_name,
                  expr,
                  database=None,
                  can_exist=False,
                  format='parquet'):
    ast = ImpalaCompiler.to_ast(expr)
    select = ast.queries[0]
    statement = CTAS(
        table_name,
        select,
        database=database,
        format=format,
        can_exist=can_exist,
    )
    return statement
Exemple #17
0
def test_correlated_predicate_subquery(table):
    t0 = table
    t1 = t0.view()

    expr = t0.g == t1.g

    ctx = ImpalaCompiler.make_context()
    ctx.make_alias(t0)

    # Grab alias from parent context
    subctx = ctx.subcontext()
    subctx.make_alias(t1)
    subctx.make_alias(t0)

    result = translate(expr, context=subctx)
    expected = "t0.`g` = t1.`g`"
    assert result == expected
Exemple #18
0
def _check_impala_output_types_match(con, table):
    query = ImpalaCompiler.to_sql(table)
    t = con.sql(query)

    def _clean_type(x):
        if isinstance(x, Category):
            x = x.to_integer_type()
        return x

    left, right = t.schema(), table.schema()
    for i, (n, left,
            right) in enumerate(zip(left.names, left.types, right.types)):
        left = _clean_type(left)
        right = _clean_type(right)

        if left != right:
            pytest.fail('Value for {} had left type {}'
                        ' and right type {}'.format(n, left, right))
Exemple #19
0
def test_multiple_filters():
    t = ibis.table([('a', 'int64'), ('b', 'string')], name='t0')
    filt = t[t.a < 100]
    expr = filt[filt.a == filt.a.max()]
    result = ImpalaCompiler.to_sql(expr)
    expected = """\
SELECT *
FROM (
  SELECT *
  FROM t0
  WHERE `a` < 100
) t0
WHERE `a` = (
  SELECT max(`a`) AS `max`
  FROM t0
  WHERE `a` < 100
)"""
    assert result == expected
Exemple #20
0
def test_nested_joins_single_cte():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')

    counts = t.group_by('uuid').size()

    last_visit = t.group_by('uuid').aggregate(last_visit=t.ts.max())

    max_counts = counts.group_by('uuid').aggregate(
        max_count=counts['count'].max())

    main_kw = max_counts.left_join(
        counts,
        ['uuid', max_counts.max_count == counts['count']]).projection([counts])

    result = main_kw.left_join(last_visit, 'uuid').projection(
        [main_kw, last_visit.last_visit])

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t1.*, t2.`last_visit`
FROM (
  SELECT t0.*
  FROM (
    SELECT `uuid`, max(`count`) AS `max_count`
    FROM t0
    GROUP BY 1
  ) t3
    LEFT OUTER JOIN t0
      ON (t3.`uuid` = t0.`uuid`) AND
         (t3.`max_count` = t0.`count`)
) t1
  LEFT OUTER JOIN (
    SELECT `uuid`, max(`ts`) AS `last_visit`
    FROM t
    GROUP BY 1
  ) t2
    ON t1.`uuid` = t2.`uuid`"""
    compiled_result = ImpalaCompiler.to_sql(result)
    assert compiled_result == expected
def test_bucket_assign_labels(table):
    buckets = [0, 10, 25, 50]
    bucket = table.f.bucket(buckets, include_under=True)

    size = table.group_by(bucket.name('tier')).size()
    labelled = size.tier.label(
        ['Under 0', '0 to 10', '10 to 25', '25 to 50'], nulls='error'
    ).name('tier2')
    expr = size[labelled, size['count']]

    expected = """\
SELECT
  CASE `tier`
    WHEN 0 THEN 'Under 0'
    WHEN 1 THEN '0 to 10'
    WHEN 2 THEN '10 to 25'
    WHEN 3 THEN '25 to 50'
    ELSE 'error'
  END AS `tier2`, `count`
FROM (
  SELECT
    CASE
      WHEN `f` < 0 THEN 0
      WHEN (0 <= `f`) AND (`f` < 10) THEN 1
      WHEN (10 <= `f`) AND (`f` < 25) THEN 2
      WHEN (25 <= `f`) AND (`f` <= 50) THEN 3
      ELSE CAST(NULL AS tinyint)
    END AS `tier`, count(*) AS `count`
  FROM alltypes
  GROUP BY 1
) t0"""

    result = ImpalaCompiler.to_sql(expr)
    assert result == expected

    with pytest.raises(ValueError):
        size.tier.label(list("abc"))

    with pytest.raises(ValueError):
        size.tier.label(list("abcde"))
Exemple #22
0
def test_nested_join_base():
    t = ibis.table([('uuid', 'string'), ('ts', 'timestamp')], name='t')
    counts = t.group_by('uuid').size()
    max_counts = counts.group_by('uuid').aggregate(
        max_count=lambda x: x['count'].max())
    result = max_counts.left_join(counts, 'uuid').projection([counts])
    compiled_result = ImpalaCompiler.to_sql(result)

    expected = """\
WITH t0 AS (
  SELECT `uuid`, count(*) AS `count`
  FROM t
  GROUP BY 1
)
SELECT t0.*
FROM (
  SELECT `uuid`, max(`count`) AS `max_count`
  FROM t0
  GROUP BY 1
) t1
  LEFT OUTER JOIN t0
    ON t1.`uuid` = t0.`uuid`"""
    assert compiled_result == expected
Exemple #23
0
def assert_sql_equal(expr, expected):
    result = ImpalaCompiler.to_sql(expr)
    assert result == expected
Exemple #24
0
def _get_select(expr, context=None):
    ast = ImpalaCompiler.to_ast(expr, context)
    select = ast.queries[0]
    context = ast.context
    return select, context
Exemple #25
0
def translate(expr, context=None, named=False):
    if context is None:
        context = ImpalaCompiler.make_context()
    translator = ImpalaExprTranslator(expr, context=context, named=named)
    return translator.get_result()
Exemple #26
0
def test_decimal_builtins(con, expr, expected):
    result = con.execute(expr)
    assert result == expected, ImpalaCompiler.to_sql(expr)
Exemple #27
0
def test_nested_join_multiple_ctes():
    ratings = ibis.table(
        [
            ('userid', 'int64'),
            ('movieid', 'int64'),
            ('rating', 'int8'),
            ('timestamp', 'string'),
        ],
        name='ratings',
    )
    movies = ibis.table([('movieid', 'int64'), ('title', 'string')],
                        name='movies')

    expr = ratings.timestamp.cast('timestamp')
    ratings2 = ratings['userid', 'movieid', 'rating', expr.name('datetime')]
    joined2 = ratings2.join(movies, ['movieid'])[ratings2, movies['title']]
    joined3 = joined2.filter(
        [joined2.userid == 118205,
         joined2.datetime.year() > 2001])
    top_user_old_movie_ids = joined3.filter(
        [joined3.userid == 118205,
         joined3.datetime.year() < 2009])[['movieid']]
    # projection from a filter was hiding an insidious bug, so we're disabling
    # that for now see issue #1295
    cond = joined3.movieid.isin(top_user_old_movie_ids.movieid)
    result = joined3[cond]

    expected = """\
WITH t0 AS (
  SELECT `userid`, `movieid`, `rating`,
         CAST(`timestamp` AS timestamp) AS `datetime`
  FROM ratings
),
t1 AS (
  SELECT t0.*, t5.`title`
  FROM t0
    INNER JOIN movies t5
      ON t0.`movieid` = t5.`movieid`
)
SELECT t2.*
FROM (
  SELECT t1.*
  FROM t1
  WHERE (t1.`userid` = 118205) AND
        (extract(t1.`datetime`, 'year') > 2001)
) t2
WHERE t2.`movieid` IN (
  SELECT `movieid`
  FROM (
    SELECT `movieid`
    FROM (
      SELECT t1.*
      FROM t1
      WHERE (t1.`userid` = 118205) AND
            (extract(t1.`datetime`, 'year') > 2001) AND
            (t1.`userid` = 118205) AND
            (extract(t1.`datetime`, 'year') < 2009)
    ) t5
  ) t4
)"""
    result = ImpalaCompiler.to_sql(result)
    assert result == expected
Exemple #28
0
def test_identical_to_special_case():
    expr = ibis.NA.cast('int64').identical_to(ibis.NA.cast('int64'))
    result = ImpalaCompiler.to_sql(expr)
    assert result == 'SELECT TRUE AS `tmp`'