def test_conditions_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)
    conditions = [['events.a', '=', 1]]
    assert conditions_expr(dataset, conditions, Query({}),
                           ParsingContext()) == '(events.a AS `events.a`) = 1'

    conditions = [[['events.a', '=', 1], ['groups.b', '=', 2]],
                  [['events.c', '=', 3], ['groups.d', '=', 4]]]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == ('((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)'
        ' AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)'
        )

    # Test column expansion
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    expanded = column_expr(dataset, 'events.tags[foo]', Query({}),
                           ParsingContext())
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '({} = 1 OR (groups.b AS `groups.b`) = 2)'.format(expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = Query({})
    parsing_context = ParsingContext()
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    column_expr(dataset, 'events.tags[foo]', reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert conditions_expr(dataset, conditions, reuse_query, parsing_context) \
        == '(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)'

    # Test special output format of LIKE
    conditions = [['events.primary_hash', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '(events.primary_hash AS `events.primary_hash`) LIKE \'%foo%\''

    conditions = tuplify(
        [[['notEmpty', ['arrayElement', ['events.exception_stacks.type', 1]]],
          '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags[sentry:user]']], '=',
                           1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(`events.tags[sentry:user]`) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags_key']], '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1'

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [['events.exception_frames.filename', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (events.exception_frames.filename AS `events.exception_frames.filename`))'
Exemple #2
0
 def build_selected_expressions(
     raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
     output = []
     for raw_expression in raw_expressions:
         exp = parse_expression(tuplify(raw_expression),
                                entity.get_data_model(), set())
         output.append(
             SelectedExpression(
                 # An expression in the query can be a string or a
                 # complex list with an alias. In the second case
                 # we trust the parser to find the alias.
                 name=raw_expression
                 if isinstance(raw_expression, str) else exp.alias,
                 expression=exp,
             ))
     return output
Exemple #3
0
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u"SELECT {}".format(
            ", ".join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u"FROM {}".format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u"{} FINAL".format(from_clause)

        if not query.get_data_source().supports_sample():
            sample_rate = None
        else:
            if query.get_sample():
                sample_rate = query.get_sample()
            elif settings.get_turbo():
                sample_rate = snuba_settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None

        if sample_rate:
            from_clause = u"{} SAMPLE {}".format(from_clause, sample_rate)

        join_clause = ""
        if query.get_arrayjoin():
            join_clause = u"ARRAY JOIN {}".format(query.get_arrayjoin())

        where_clause = ""
        if query.get_conditions():
            where_clause = u"WHERE {}".format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ""
        if query.get_prewhere():
            prewhere_clause = u"PREWHERE {}".format(
                conditions_expr(dataset, query.get_prewhere(), query,
                                parsing_context))

        group_clause = ""
        if groupby:
            group_clause = "GROUP BY ({})".format(", ".join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = "{} WITH TOTALS".format(group_clause)

        having_clause = ""
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, "found HAVING clause with no GROUP BY"
            having_clause = u"HAVING {}".format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ""
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u"{} {}".format(ob.lstrip("-"),
                                "DESC" if ob.startswith("-") else "ASC")
                for ob in orderby
            ]
            order_clause = u"ORDER BY {}".format(", ".join(orderby))

        limitby_clause = ""
        if query.get_limitby() is not None:
            limitby_clause = "LIMIT {} BY {}".format(*query.get_limitby())

        limit_clause = ""
        if query.get_limit() is not None:
            limit_clause = "LIMIT {}, {}".format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = " ".join([
            c for c in [
                select_clause,
                from_clause,
                join_clause,
                prewhere_clause,
                where_clause,
                group_clause,
                having_clause,
                order_clause,
                limitby_clause,
                limit_clause,
            ] if c
        ])
Exemple #4
0
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
        prewhere_conditions: Sequence[str],
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u'SELECT {}'.format(
            ', '.join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u'FROM {}'.format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u'{} FINAL'.format(from_clause)

        if query.get_sample():
            sample_rate = query.get_sample()
        elif settings.get_turbo():
            sample_rate = snuba_settings.TURBO_SAMPLE_RATE
        else:
            sample_rate = None

        if sample_rate:
            from_clause = u'{} SAMPLE {}'.format(from_clause, sample_rate)

        join_clause = ''
        if query.get_arrayjoin():
            join_clause = u'ARRAY JOIN {}'.format(query.get_arrayjoin())

        where_clause = ''
        if query.get_conditions():
            where_clause = u'WHERE {}'.format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ''
        if prewhere_conditions:
            prewhere_clause = u'PREWHERE {}'.format(
                conditions_expr(dataset, prewhere_conditions, query,
                                parsing_context))

        group_clause = ''
        if groupby:
            group_clause = 'GROUP BY ({})'.format(', '.join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = '{} WITH TOTALS'.format(group_clause)

        having_clause = ''
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, 'found HAVING clause with no GROUP BY'
            having_clause = u'HAVING {}'.format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ''
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u'{} {}'.format(ob.lstrip('-'),
                                'DESC' if ob.startswith('-') else 'ASC')
                for ob in orderby
            ]
            order_clause = u'ORDER BY {}'.format(', '.join(orderby))

        limitby_clause = ''
        if query.get_limitby() is not None:
            limitby_clause = 'LIMIT {} BY {}'.format(*query.get_limitby())

        limit_clause = ''
        if query.get_limit() is not None:
            limit_clause = 'LIMIT {}, {}'.format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = ' '.join([
            c for c in [
                select_clause, from_clause, join_clause, prewhere_clause,
                where_clause, group_clause, having_clause, order_clause,
                limitby_clause, limit_clause
            ] if c
        ])
Exemple #5
0
                 ),
             ),
             Literal(None, 1),
         ),
     ),
 ),  # Test array columns in boolean functions are expanded as an iterator.
 (
     [["tags.key", "=", "key"]],
     FunctionCall(
         None,
         ConditionFunctions.EQ,
         (Column(None, None, "tags.key"), Literal(None, "key")),
     ),
 ),  # Array columns not expanded because in arrayjoin
 (
     tuplify([["platform", "IN", ["a", "b", "c"]],
              ["platform", "IN", ["c", "b", "a"]]]),
     FunctionCall(
         None,
         ConditionFunctions.IN,
         (
             Column(None, None, "platform"),
             FunctionCall(
                 None,
                 "tuple",
                 (Literal(None, "a"), Literal(None, "b"), Literal(
                     None, "c")),
             ),
         ),
     ),
 ),  # Test that a duplicate IN condition is de-duplicated even if the lists are in different orders.
 (
Exemple #6
0
    def test_complex_conditions_expr(self, dataset):
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        query = Query({}, source)

        assert (complex_column_expr(dataset, tuplify(["count", []]),
                                    deepcopy(query),
                                    ParsingContext()) == "count()")
        assert (complex_column_expr(
            dataset,
            tuplify(["notEmpty", ["foo"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "notEmpty(foo)")
        assert (complex_column_expr(
            dataset,
            tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]),
            deepcopy(query),
            ParsingContext(),
        ) == "notEmpty(arrayElement(foo, 1))")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["bar", ["qux"], "baz"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "foo(bar(qux), baz)")
        assert (complex_column_expr(dataset, tuplify(["foo", [], "a"]),
                                    deepcopy(query),
                                    ParsingContext()) == "(foo() AS a)")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["b", "c"], "d"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(foo(b, c) AS d)")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["b", "c", ["d"]]]),
            deepcopy(query),
            ParsingContext(),
        ) == "foo(b, c(d))")

        assert (complex_column_expr(
            dataset,
            tuplify(["top3", ["project_id"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "topK(3)(project_id)")
        assert (complex_column_expr(
            dataset,
            tuplify(["top10", ["project_id"], "baz"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(topK(10)(project_id) AS baz)")

        assert (complex_column_expr(
            dataset,
            tuplify(["emptyIfNull", ["project_id"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "ifNull(project_id, '')")
        assert (complex_column_expr(
            dataset,
            tuplify(["emptyIfNull", ["project_id"], "foo"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(ifNull(project_id, '') AS foo)")

        assert (complex_column_expr(dataset, tuplify(["or", ["a", "b"]]),
                                    deepcopy(query),
                                    ParsingContext()) == "or(a, b)")
        assert (complex_column_expr(dataset, tuplify(["and", ["a", "b"]]),
                                    deepcopy(query),
                                    ParsingContext()) == "and(a, b)")
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["or", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(or(a, b), c)")
        assert (complex_column_expr(
            dataset,
            tuplify(["and", [["and", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "and(and(a, b), c)")
        # (A OR B) AND C
        assert (complex_column_expr(
            dataset,
            tuplify(["and", [["or", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "and(or(a, b), c)")
        # (A AND B) OR C
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["and", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(and(a, b), c)")
        # A OR B OR C OR D
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["or", [["or", ["c", "d"]], "b"]], "a"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(or(or(c, d), b), a)")

        assert (complex_column_expr(
            dataset,
            tuplify([
                "if",
                [
                    ["in", ["release", "tuple", ["'foo'"]]],
                    "release",
                    "'other'",
                ],
                "release",
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "(if(in(release, tuple('foo')), release, 'other') AS release)")
        assert (complex_column_expr(
            dataset,
            tuplify([
                "if",
                ["in", ["release", "tuple", ["'foo'"]], "release", "'other'"],
                "release",
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "(if(in(release, tuple('foo')), release, 'other') AS release)")

        # TODO once search_message is filled in everywhere, this can be just 'message' again.
        message_expr = "(coalesce(search_message, message) AS message)"
        assert complex_column_expr(
            dataset,
            tuplify([
                "positionCaseInsensitive",
                ["message", "'lol 'single' quotes'"]
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "positionCaseInsensitive({message_expr}, 'lol \\'single\\' quotes')".format(
            **locals())

        # dangerous characters are allowed but escaped in literals and column names
        assert (complex_column_expr(
            dataset,
            tuplify(["safe", ["fo`o", "'ba'r'"]]),
            deepcopy(query),
            ParsingContext(),
        ) == r"safe(`fo\`o`, 'ba\'r')")

        # Dangerous characters not allowed in functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                dataset,
                tuplify([r"dang'erous", ["message", "`"]]),
                deepcopy(query),
                ParsingContext(),
            )

        # Or nested functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                dataset,
                tuplify([r"safe", ["dang`erous", ["message"]]]),
                deepcopy(query),
                ParsingContext(),
            )
Exemple #7
0
def _parse_query_impl(body: MutableMapping[str, Any],
                      dataset: Dataset) -> Query:
    aggregate_exprs = []
    for aggregation in body.get("aggregations", []):
        assert isinstance(aggregation, (list, tuple))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregate_exprs.append(
            parse_aggregation(aggregation_function, column_expr, alias))

    groupby_exprs = [
        parse_expression(tuplify(group_by))
        for group_by in to_list(body.get("groupby", []))
    ]
    select_exprs = [
        parse_expression(tuplify(select))
        for select in body.get("selected_columns", [])
    ]

    selected_cols = groupby_exprs + aggregate_exprs + select_exprs

    arrayjoin = body.get("arrayjoin")
    if arrayjoin:
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"])
    else:
        array_join_expr = None

    where_expr = parse_conditions_to_expr(body.get("conditions", []), dataset,
                                          arrayjoin)
    having_expr = parse_conditions_to_expr(body.get("having", []), dataset,
                                           arrayjoin)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            assert match is not None, f"Invalid Order By clause {orderby}"
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            assert match is not None, f"Invalid Order By clause {orderby}"
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ValueError(f"Invalid Order By clause {orderby}")
        orderby_parsed = parse_expression(tuplify(orderby))
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    return Query(
        body,
        source,
        selected_columns=selected_cols,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=groupby_exprs,
        having=having_expr,
        order_by=orderby_exprs,
    )
Exemple #8
0
    def test_complex_conditions_expr(self, dataset):
        query = Query({})

        assert complex_column_expr(dataset, tuplify(['count', []]), deepcopy(query), ParsingContext()) == 'count()'
        assert complex_column_expr(dataset, tuplify(['notEmpty', ['foo']]), deepcopy(query), ParsingContext()) == 'notEmpty(foo)'
        assert complex_column_expr(dataset, tuplify(['notEmpty', ['arrayElement', ['foo', 1]]]), deepcopy(query), ParsingContext()) == 'notEmpty(arrayElement(foo, 1))'
        assert complex_column_expr(dataset, tuplify(['foo', ['bar', ['qux'], 'baz']]), deepcopy(query), ParsingContext()) == 'foo(bar(qux), baz)'
        assert complex_column_expr(dataset, tuplify(['foo', [], 'a']), deepcopy(query), ParsingContext()) == '(foo() AS a)'
        assert complex_column_expr(dataset, tuplify(['foo', ['b', 'c'], 'd']), deepcopy(query), ParsingContext()) == '(foo(b, c) AS d)'
        assert complex_column_expr(dataset, tuplify(['foo', ['b', 'c', ['d']]]), deepcopy(query), ParsingContext()) == 'foo(b, c(d))'

        assert complex_column_expr(dataset, tuplify(['top3', ['project_id']]), deepcopy(query), ParsingContext()) == 'topK(3)(project_id)'
        assert complex_column_expr(dataset, tuplify(['top10', ['project_id'], 'baz']), deepcopy(query), ParsingContext()) == '(topK(10)(project_id) AS baz)'

        assert complex_column_expr(dataset, tuplify(['emptyIfNull', ['project_id']]), deepcopy(query), ParsingContext()) == 'ifNull(project_id, \'\')'
        assert complex_column_expr(dataset, tuplify(['emptyIfNull', ['project_id'], 'foo']), deepcopy(query), ParsingContext()) == '(ifNull(project_id, \'\') AS foo)'

        assert complex_column_expr(dataset, tuplify(['or', ['a', 'b']]), deepcopy(query), ParsingContext()) == 'or(a, b)'
        assert complex_column_expr(dataset, tuplify(['and', ['a', 'b']]), deepcopy(query), ParsingContext()) == 'and(a, b)'
        assert complex_column_expr(dataset, tuplify(['or', [['or', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'or(or(a, b), c)'
        assert complex_column_expr(dataset, tuplify(['and', [['and', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'and(and(a, b), c)'
        # (A OR B) AND C
        assert complex_column_expr(dataset, tuplify(['and', [['or', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'and(or(a, b), c)'
        # (A AND B) OR C
        assert complex_column_expr(dataset, tuplify(['or', [['and', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'or(and(a, b), c)'
        # A OR B OR C OR D
        assert complex_column_expr(dataset, tuplify(['or', [['or', [['or', ['c', 'd']], 'b']], 'a']]), deepcopy(query), ParsingContext()) == 'or(or(or(c, d), b), a)'

        assert complex_column_expr(dataset, tuplify(['if', [['in', ['release', 'tuple', ["'foo'"], ], ], 'release', "'other'"], 'release', ]), deepcopy(query), ParsingContext()) == "(if(in(release, tuple('foo')), release, 'other') AS release)"
        assert complex_column_expr(dataset, tuplify(['if', ['in', ['release', 'tuple', ["'foo'"]], 'release', "'other'", ], 'release']), deepcopy(query), ParsingContext()) == "(if(in(release, tuple('foo')), release, 'other') AS release)"

        # TODO once search_message is filled in everywhere, this can be just 'message' again.
        message_expr = '(coalesce(search_message, message) AS message)'
        assert complex_column_expr(dataset, tuplify(['positionCaseInsensitive', ['message', "'lol 'single' quotes'"]]), deepcopy(query), ParsingContext())\
            == "positionCaseInsensitive({message_expr}, 'lol \\'single\\' quotes')".format(**locals())

        # dangerous characters are allowed but escaped in literals and column names
        assert complex_column_expr(dataset, tuplify(['safe', ['fo`o', "'ba'r'"]]), deepcopy(query), ParsingContext()) == r"safe(`fo\`o`, 'ba\'r')"

        # Dangerous characters not allowed in functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(dataset, tuplify([r"dang'erous", ['message', '`']]), deepcopy(query), ParsingContext())

        # Or nested functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(dataset, tuplify([r"safe", ['dang`erous', ['message']]]), deepcopy(query), ParsingContext())
Exemple #9
0
    def test_conditions_expr(self):
        conditions = [['a', '=', 1]]
        assert conditions_expr(conditions, {}) == 'a = 1'

        conditions = [[['a', '=', 1]]]
        assert conditions_expr(conditions, {}) == 'a = 1'

        conditions = [['a', '=', 1], ['b', '=', 2]]
        assert conditions_expr(conditions, {}) == 'a = 1 AND b = 2'

        conditions = [[['a', '=', 1], ['b', '=', 2]]]
        assert conditions_expr(conditions, {}) == '(a = 1 OR b = 2)'

        conditions = [[['a', '=', 1], ['b', '=', 2]], ['c', '=', 3]]
        assert conditions_expr(conditions, {}) == '(a = 1 OR b = 2) AND c = 3'

        conditions = [[['a', '=', 1], ['b', '=', 2]],
                      [['c', '=', 3], ['d', '=', 4]]]
        assert conditions_expr(conditions,
                               {}) == '(a = 1 OR b = 2) AND (c = 3 OR d = 4)'

        # Malformed condition input
        conditions = [[['a', '=', 1], []]]
        assert conditions_expr(conditions, {}) == 'a = 1'

        # Test column expansion
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        expanded = column_expr('tags[foo]', {})
        assert conditions_expr(conditions,
                               {}) == '({} = 1 OR b = 2)'.format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_body = {}
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        column_expr('tags[foo]',
                    reuse_body)  # Expand it once so the next time is aliased
        assert conditions_expr(conditions,
                               reuse_body) == '(`tags[foo]` = 1 OR b = 2)'

        # Test special output format of LIKE
        conditions = [['primary_hash', 'LIKE', '%foo%']]
        assert conditions_expr(conditions, {}) == 'primary_hash LIKE \'%foo%\''

        conditions = tuplify(
            [[['notEmpty', ['arrayElement', ['exception_stacks.type', 1]]],
              '=', 1]])
        assert conditions_expr(
            conditions,
            {}) == 'notEmpty(arrayElement(exception_stacks.type, 1)) = 1'

        conditions = tuplify([[['notEmpty', ['tags[sentry:user]']], '=', 1]])
        assert conditions_expr(
            conditions,
            {}) == 'notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1'

        conditions = tuplify([[['notEmpty', ['tags_key']], '=', 1]])
        assert conditions_expr(
            conditions,
            {}) == 'notEmpty((arrayJoin(tags.key) AS tags_key)) = 1'

        conditions = tuplify([
            [[['notEmpty', ['tags[sentry:environment]']], '=', 'dev'],
             [['notEmpty', ['tags[sentry:environment]']], '=', 'prod']],
            [[['notEmpty', ['tags[sentry:user]']], '=', 'joe'],
             [['notEmpty', ['tags[sentry:user]']], '=', 'bob']],
        ])
        assert conditions_expr(conditions, {}) == \
                """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [['exception_frames.filename', 'LIKE', '%foo%']]
        assert conditions_expr(
            conditions, {}
        ) == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), exception_frames.filename)'

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [['exception_frames.filename', 'NOT LIKE', '%foo%']]
        assert conditions_expr(
            conditions, {}
        ) == 'arrayAll(x -> assumeNotNull(x NOT LIKE \'%foo%\'), exception_frames.filename)'
Exemple #10
0
def test_complex_conditions_expr() -> None:
    assert parse_function_to_expr(tuplify(["count", []]),) == FunctionCall(
        None, "count", ()
    )
    assert parse_function_to_expr(tuplify(["notEmpty", ["foo"]]),) == FunctionCall(
        None, "notEmpty", (Column(None, "foo", None),)
    )
    assert parse_function_to_expr(
        tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]),
    ) == FunctionCall(
        None,
        "notEmpty",
        (
            FunctionCall(
                None, "arrayElement", (Column(None, "foo", None), Literal(None, 1))
            ),
        ),
    )
    assert parse_function_to_expr(
        tuplify(["foo", ["bar", ["qux"], "baz"]]),
    ) == FunctionCall(
        None,
        "foo",
        (
            FunctionCall(None, "bar", (Column(None, "qux", None),)),
            Column(None, "baz", None),
        ),
    )
    assert parse_function_to_expr(tuplify(["foo", [], "a"]),) == FunctionCall(
        "a", "foo", ()
    )
    assert parse_function_to_expr(tuplify(["foo", ["b", "c"], "d"]),) == FunctionCall(
        "d", "foo", (Column(None, "b", None), Column(None, "c", None))
    )
    assert parse_function_to_expr(tuplify(["foo", ["b", "c", ["d"]]]),) == FunctionCall(
        None,
        "foo",
        (Column(None, "b", None), FunctionCall(None, "c", (Column(None, "d", None),))),
    )

    assert parse_function_to_expr(
        tuplify(["emptyIfNull", ["project_id"]]),
    ) == FunctionCall(None, "emptyIfNull", (Column(None, "project_id", None),))

    assert parse_function_to_expr(
        tuplify(["or", [["or", ["a", "b"]], "c"]]),
    ) == binary_condition(
        None,
        BooleanFunctions.OR,
        binary_condition(
            None, BooleanFunctions.OR, Column(None, "a", None), Column(None, "b", None)
        ),
        Column(None, "c", None),
    )
    assert parse_function_to_expr(
        tuplify(["and", [["and", ["a", "b"]], "c"]]),
    ) == binary_condition(
        None,
        BooleanFunctions.AND,
        binary_condition(
            None, BooleanFunctions.AND, Column(None, "a", None), Column(None, "b", None)
        ),
        Column(None, "c", None),
    )
    # (A OR B) AND C
    assert parse_function_to_expr(
        tuplify(["and", [["or", ["a", "b"]], "c"]]),
    ) == binary_condition(
        None,
        BooleanFunctions.AND,
        binary_condition(
            None, BooleanFunctions.OR, Column(None, "a", None), Column(None, "b", None)
        ),
        Column(None, "c", None),
    )
    # A OR B OR C OR D
    assert parse_function_to_expr(
        tuplify(["or", [["or", [["or", ["c", "d"]], "b"]], "a"]]),
    ) == binary_condition(
        None,
        BooleanFunctions.OR,
        binary_condition(
            None,
            BooleanFunctions.OR,
            binary_condition(
                None,
                BooleanFunctions.OR,
                Column(None, "c", None),
                Column(None, "d", None),
            ),
            Column(None, "b", None),
        ),
        Column(None, "a", None),
    )

    assert parse_function_to_expr(
        tuplify(
            [
                "if",
                [["in", ["release", "tuple", ["'foo'"]]], "release", "'other'"],
                "release",
            ]
        ),
    ) == FunctionCall(
        "release",
        "if",
        (
            FunctionCall(
                None,
                "in",
                (
                    Column(None, "release", None),
                    FunctionCall(None, "tuple", (Literal(None, "foo"),)),
                ),
            ),
            Column(None, "release", None),
            Literal(None, "other"),
        ),
    )

    # TODO once search_message is filled in everywhere, this can be just 'message' again.
    assert parse_function_to_expr(
        tuplify(["positionCaseInsensitive", ["message", "'lol 'single' quotes'"]]),
    ) == FunctionCall(
        None,
        "positionCaseInsensitive",
        (Column(None, "message", None), Literal(None, "lol 'single' quotes")),
    )
Exemple #11
0
    def test_column_expr(self):
        body = {'granularity': 86400}
        # Single tag expression
        assert column_expr(self.dataset, 'tags[foo]', body.copy()) ==\
            "(tags.value[indexOf(tags.key, \'foo\')] AS `tags[foo]`)"

        # Promoted tag expression / no translation
        assert column_expr(self.dataset, 'tags[server_name]', body.copy()) ==\
            "(server_name AS `tags[server_name]`)"

        # Promoted tag expression / with translation
        assert column_expr(self.dataset, 'tags[app.device]', body.copy()) ==\
            "(app_device AS `tags[app.device]`)"

        # All tag keys expression
        assert column_expr(
            self.dataset, 'tags_key',
            body.copy()) == ('(arrayJoin(tags.key) AS tags_key)')

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {'groupby': ['tags_key', 'tags_value']}
        assert column_expr(self.dataset, 'tags_key', tag_group_body) == (
            '(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) '
            'AS all_tags))[1] AS tags_key)')

        assert column_expr(self.dataset, 'time', body.copy()) ==\
            "(toDate(timestamp) AS time)"

        assert column_expr(self.dataset, 'rtime', body.copy()) ==\
            "(toDate(received) AS rtime)"

        assert column_expr(self.dataset, 'col', body.copy(), aggregate='sum') ==\
            "(sum(col) AS col)"

        assert column_expr(self.dataset, 'col', body.copy(), alias='summation', aggregate='sum') ==\
            "(sum(col) AS summation)"

        # Special cases where count() doesn't need a column
        assert column_expr(self.dataset, '', body.copy(), alias='count', aggregate='count()') ==\
            "(count() AS count)"

        assert column_expr(self.dataset, '', body.copy(), alias='aggregate', aggregate='count()') ==\
            "(count() AS aggregate)"

        # Columns that need escaping
        assert column_expr(self.dataset, 'sentry:release',
                           body.copy()) == '`sentry:release`'

        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert column_expr(self.dataset, '-timestamp',
                           body.copy()) == '-timestamp'
        assert column_expr(self.dataset, '-sentry:release',
                           body.copy()) == '-`sentry:release`'

        # A 'column' that is actually a string literal
        assert column_expr(self.dataset, '\'hello world\'',
                           body.copy()) == '\'hello world\''

        # Complex expressions (function calls) involving both string and column arguments
        assert column_expr(self.dataset,
                           tuplify(['concat', ['a', '\':\'', 'b']]),
                           body.copy()) == 'concat(a, \':\', b)'

        group_id_body = body.copy()
        assert column_expr(self.dataset, 'issue',
                           group_id_body) == '(nullIf(group_id, 0) AS issue)'
        assert column_expr(
            self.dataset, 'group_id',
            group_id_body) == '(nullIf(group_id, 0) AS group_id)'

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert column_expr(self.dataset,
                           'tags[environment]',
                           body.copy(),
                           alias='unique_envs',
                           aggregate='uniq'
                           ) == "(ifNull(uniq(environment), 0) AS unique_envs)"
Exemple #12
0
    def format(self) -> str:
        """Generate a SQL string from the parameters."""
        body = self.__request.body
        query = self.__request.query
        source = self.__dataset \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        aggregate_exprs = [
            util.column_expr(self.__dataset, col, body, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            util.column_expr(self.__dataset, gb, body) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            util.column_expr(self.__dataset, util.tuplify(colname), body)
            for colname in column_names
        ]
        select_clause = u'SELECT {}'.format(
            ', '.join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u'FROM {}'.format(source)
        if self.__final:
            from_clause = u'{} FINAL'.format(from_clause)
        if query.get_sample():
            from_clause = u'{} SAMPLE {}'.format(from_clause,
                                                 query.get_sample())

        join_clause = ''
        if 'arrayjoin' in body:
            join_clause = u'ARRAY JOIN {}'.format(body['arrayjoin'])

        where_clause = ''
        if query.get_conditions():
            where_clause = u'WHERE {}'.format(
                util.conditions_expr(self.__dataset, query.get_conditions(),
                                     body))

        prewhere_clause = ''
        if self.__prewhere_conditions:
            prewhere_clause = u'PREWHERE {}'.format(
                util.conditions_expr(self.__dataset,
                                     self.__prewhere_conditions, body))

        group_clause = ''
        if groupby:
            group_clause = 'GROUP BY ({})'.format(', '.join(
                util.column_expr(self.__dataset, gb, body) for gb in groupby))
            if body.get('totals', False):
                group_clause = '{} WITH TOTALS'.format(group_clause)

        having_clause = ''
        having_conditions = body.get('having', [])
        if having_conditions:
            assert groupby, 'found HAVING clause with no GROUP BY'
            having_clause = u'HAVING {}'.format(
                util.conditions_expr(self.__dataset, having_conditions, body))

        order_clause = ''
        if query.get_orderby():
            orderby = [
                util.column_expr(self.__dataset, util.tuplify(ob), body)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u'{} {}'.format(ob.lstrip('-'),
                                'DESC' if ob.startswith('-') else 'ASC')
                for ob in orderby
            ]
            order_clause = u'ORDER BY {}'.format(', '.join(orderby))

        limitby_clause = ''
        if 'limitby' in body:
            limitby_clause = 'LIMIT {} BY {}'.format(*body['limitby'])

        limit_clause = ''
        if 'limit' in body:
            limit_clause = 'LIMIT {}, {}'.format(query.get_offset(),
                                                 body['limit'])

        return ' '.join([
            c for c in [
                select_clause, from_clause, join_clause, prewhere_clause,
                where_clause, group_clause, having_clause, order_clause,
                limitby_clause, limit_clause
            ] if c
        ])
Exemple #13
0
    def test_column_expr(self):
        source = (
            self.dataset.get_all_storages()[0]
            .get_schemas()
            .get_read_schema()
            .get_data_source()
        )
        query = Query({"granularity": 86400}, source,)
        # Single tag expression
        assert (
            column_expr(self.dataset, "tags[foo]", deepcopy(query), ParsingContext())
            == "(tags.value[indexOf(tags.key, 'foo')] AS `tags[foo]`)"
        )

        # Promoted tag expression / no translation
        assert (
            column_expr(
                self.dataset, "tags[server_name]", deepcopy(query), ParsingContext()
            )
            == "(server_name AS `tags[server_name]`)"
        )

        # Promoted tag expression / with translation
        assert (
            column_expr(
                self.dataset, "tags[app.device]", deepcopy(query), ParsingContext()
            )
            == "(app_device AS `tags[app.device]`)"
        )

        # Promoted context expression / with translation
        assert (
            column_expr(
                self.dataset,
                "contexts[device.battery_level]",
                deepcopy(query),
                ParsingContext(),
            )
            == "(toString(device_battery_level) AS `contexts[device.battery_level]`)"
        )

        # All tag keys expression
        q = Query({"granularity": 86400, "selected_columns": ["tags_key"]}, source,)
        assert column_expr(self.dataset, "tags_key", q, ParsingContext()) == (
            "(arrayJoin(tags.key) AS tags_key)"
        )

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {"groupby": ["tags_key", "tags_value"]}
        assert column_expr(
            self.dataset, "tags_key", Query(tag_group_body, source), ParsingContext()
        ) == (
            "(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) "
            "AS all_tags))[1] AS tags_key)"
        )

        assert (
            column_expr(self.dataset, "time", deepcopy(query), ParsingContext())
            == "(toDate(timestamp) AS time)"
        )

        assert (
            column_expr(self.dataset, "rtime", deepcopy(query), ParsingContext())
            == "(toDate(received) AS rtime)"
        )

        assert (
            column_expr(
                self.dataset, "col", deepcopy(query), ParsingContext(), aggregate="sum"
            )
            == "(sum(col) AS col)"
        )

        assert (
            column_expr(
                self.dataset,
                "col",
                deepcopy(query),
                ParsingContext(),
                alias="summation",
                aggregate="sum",
            )
            == "(sum(col) AS summation)"
        )

        # Special cases where count() doesn't need a column
        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="count",
                aggregate="count()",
            )
            == "(count() AS count)"
        )

        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="aggregate",
                aggregate="count()",
            )
            == "(count() AS aggregate)"
        )

        # Columns that need escaping
        assert (
            column_expr(
                self.dataset, "sentry:release", deepcopy(query), ParsingContext()
            )
            == "`sentry:release`"
        )

        # A 'column' that is actually a string literal
        assert (
            column_expr(
                self.dataset, "'hello world'", deepcopy(query), ParsingContext()
            )
            == "'hello world'"
        )

        # Complex expressions (function calls) involving both string and column arguments
        assert (
            column_expr(
                self.dataset,
                tuplify(["concat", ["a", "':'", "b"]]),
                deepcopy(query),
                ParsingContext(),
            )
            == "concat(a, ':', b)"
        )

        group_id_query = deepcopy(query)
        assert (
            column_expr(self.dataset, "group_id", group_id_query, ParsingContext())
            == "(nullIf(group_id, 0) AS group_id)"
        )

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert (
            column_expr(
                self.dataset,
                "tags[environment]",
                deepcopy(query),
                ParsingContext(),
                alias="unique_envs",
                aggregate="uniq",
            )
            == "(ifNull(uniq(environment), 0) AS unique_envs)"
        )
Exemple #14
0
    def test_conditions_expr(self, dataset):
        state.set_config("use_escape_alias", 1)
        conditions = [["a", "=", 1]]
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = []
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[[]], []]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[["a", "=", 1]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = [["a", "=", 1], ["b", "=", 2]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1 AND b = 2")

        conditions = [[["a", "=", 1], ["b", "=", 2]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "(a = 1 OR b = 2)")

        conditions = [[["a", "=", 1], ["b", "=", 2]], ["c", "=", 3]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "(a = 1 OR b = 2) AND c = 3")

        conditions = [[["a", "=", 1], ["b", "=", 2]],
                      [["c", "=", 3], ["d", "=", 4]]]
        assert (conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "(a = 1 OR b = 2) AND (c = 3 OR d = 4)")

        # Malformed condition input
        conditions = [[["a", "=", 1], []]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        # Test column expansion
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        expanded = column_expr(dataset, "tags[foo]", Query({}, source),
                               ParsingContext())
        assert conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "({} = 1 OR b = 2)".format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({}, source)
        parsing_context = ParsingContext()
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        column_expr(
            dataset, "tags[foo]", reuse_query,
            parsing_context)  # Expand it once so the next time is aliased
        assert (conditions_expr(
            dataset, conditions, reuse_query,
            parsing_context) == "(`tags[foo]` = 1 OR b = 2)")

        # Test special output format of LIKE
        conditions = [["primary_hash", "LIKE", "%foo%"]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "primary_hash LIKE '%foo%'")

        conditions = tuplify(
            [[["notEmpty", ["arrayElement", ["exception_stacks.type", 1]]],
              "=", 1]])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1"
        )

        conditions = tuplify([[["notEmpty", ["tags[sentry:user]"]], "=", 1]])
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) ==
                "notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1")

        conditions = tuplify([[["notEmpty", ["tags_key"]], "=", 1]])
        assert (conditions_expr(
            dataset,
            conditions,
            Query({"conditions": [[["notEmpty", ["tags_key"]], "=", 1]]},
                  source),
            ParsingContext(),
        ) == "notEmpty((arrayJoin(tags.key) AS tags_key)) = 1")

        conditions = tuplify([
            [
                [["notEmpty", ["tags[sentry:environment]"]], "=", "dev"],
                [["notEmpty", ["tags[sentry:environment]"]], "=", "prod"],
            ],
            [
                [["notEmpty", ["tags[sentry:user]"]], "=", "joe"],
                [["notEmpty", ["tags[sentry:user]"]], "=", "bob"],
            ],
        ])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""
        )

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [["exception_frames.filename", "LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [["exception_frames.filename", "NOT LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayAll(x -> assumeNotNull(x NOT LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([["platform", "IN", ["a", "b", "c"]],
                              ["platform", "IN", ["c", "b", "a"]]])
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "platform IN ('a', 'b', 'c')")
def test_simple_column_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)

    body = {'granularity': 86400}
    query = Query(body)
    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext()) \
        == "(events.event_id AS `events.event_id`)"

    assert column_expr(dataset, "groups.id", deepcopy(query), ParsingContext()) \
        == "(groups.id AS `groups.id`)"

    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext(), "MyVerboseAlias") \
        == "(events.event_id AS MyVerboseAlias)"

    # Single tag expression
    assert column_expr(dataset, 'events.tags[foo]', deepcopy(query), ParsingContext()) ==\
        "(events.tags.value[indexOf(events.tags.key, \'foo\')] AS `events.tags[foo]`)"

    # Promoted tag expression / no translation
    assert column_expr(dataset, 'events.tags[server_name]', deepcopy(query), ParsingContext()) ==\
        "(events.server_name AS `events.tags[server_name]`)"

    # All tag keys expression
    assert column_expr(dataset, 'events.tags_key', deepcopy(query),
                       ParsingContext()) == (
                           '(arrayJoin(events.tags.key) AS `events.tags_key`)')

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {'groupby': ['events.tags_key', 'events.tags_value']}
    parsing_context = ParsingContext()
    assert column_expr(
        dataset, 'events.tags_key', Query(tag_group_body), parsing_context
    ) == (
        '(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) '
        'AS all_tags))[1] AS `events.tags_key`)')

    assert column_expr(dataset, 'events.time', deepcopy(query), ParsingContext()) ==\
        "(toDate(events.timestamp) AS `events.time`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), aggregate='sum') ==\
        "(sum(events.col) AS `events.col`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), alias='summation', aggregate='sum') ==\
        "(sum(events.col) AS summation)"

    assert column_expr(dataset, '', deepcopy(query), ParsingContext(), alias='aggregate', aggregate='count()') ==\
        "(count() AS aggregate)"

    # Columns that need escaping
    assert column_expr(dataset, 'events.sentry:release', deepcopy(query),
                       ParsingContext()) == '`events.sentry:release`'

    # A 'column' that is actually a string literal
    assert column_expr(dataset, '\'hello world\'', deepcopy(query),
                       ParsingContext()) == '\'hello world\''

    # Complex expressions (function calls) involving both string and column arguments
    assert column_expr(dataset, tuplify(['concat', ['a', '\':\'', 'b']]),
                       deepcopy(query),
                       ParsingContext()) == 'concat(a, \':\', b)'

    group_id_body = deepcopy(query)
    assert column_expr(
        dataset, 'events.issue', group_id_body,
        ParsingContext()) == '(nullIf(events.group_id, 0) AS `events.issue`)'

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert column_expr(
        dataset,
        'events.tags[environment]',
        deepcopy(query),
        ParsingContext(),
        alias='unique_envs',
        aggregate='uniq'
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)"
Exemple #16
0
    def test_column_expr(self):
        body = {'granularity': 86400}
        # Single tag expression
        assert column_expr('tags[foo]', body.copy()) ==\
            "(tags.value[indexOf(tags.key, \'foo\')] AS `tags[foo]`)"

        # Promoted tag expression / no translation
        assert column_expr('tags[server_name]', body.copy()) ==\
            "(server_name AS `tags[server_name]`)"

        # Promoted tag expression / with translation
        assert column_expr('tags[app.device]', body.copy()) ==\
            "(app_device AS `tags[app.device]`)"

        # All tag keys expression
        assert column_expr(
            'tags_key', body.copy()) == ('(arrayJoin(tags.key) AS tags_key)')

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {'groupby': ['tags_key', 'tags_value']}
        assert column_expr('tags_key', tag_group_body) == (
            '(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) '
            'AS all_tags))[1] AS tags_key)')

        assert column_expr('time', body.copy()) ==\
            "(toDate(timestamp) AS time)"

        assert column_expr('col', body.copy(), aggregate='sum') ==\
            "(sum(col) AS col)"

        assert column_expr(None, body.copy(), alias='sum', aggregate='sum') ==\
            "sum"  # This should probably be an error as its an aggregate with no column

        assert column_expr('col', body.copy(), alias='summation', aggregate='sum') ==\
            "(sum(col) AS summation)"

        # Special cases where count() doesn't need a column
        assert column_expr('', body.copy(), alias='count', aggregate='count()') ==\
            "(count() AS count)"

        assert column_expr('', body.copy(), alias='aggregate', aggregate='count()') ==\
            "(count() AS aggregate)"

        # Columns that need escaping
        assert column_expr('sentry:release', body.copy()) == '`sentry:release`'

        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert column_expr('-timestamp', body.copy()) == '-timestamp'
        assert column_expr('-sentry:release',
                           body.copy()) == '-`sentry:release`'

        # A 'column' that is actually a string literal
        assert column_expr('\'hello world\'', body.copy()) == '\'hello world\''

        # Complex expressions (function calls) involving both string and column arguments
        assert column_expr(tuplify(['concat', ['a', '\':\'', 'b']]),
                           body.copy()) == 'concat(a, \':\', b)'

        group_id_body = body.copy()
        assert column_expr('issue', group_id_body) == '(group_id AS issue)'
Exemple #17
0
    def test_complex_conditions_expr(self):
        body = {}

        assert complex_column_expr(tuplify(['count', []]),
                                   body.copy()) == 'count()'
        assert complex_column_expr(tuplify(['notEmpty', ['foo']]),
                                   body.copy()) == 'notEmpty(foo)'
        assert complex_column_expr(
            tuplify(['notEmpty', ['arrayElement', ['foo', 1]]]),
            body.copy()) == 'notEmpty(arrayElement(foo, 1))'
        assert complex_column_expr(tuplify(['foo', ['bar', ['qux'], 'baz']]),
                                   body.copy()) == 'foo(bar(qux), baz)'
        assert complex_column_expr(tuplify(['foo', [], 'a']),
                                   body.copy()) == '(foo() AS a)'
        assert complex_column_expr(tuplify(['foo', ['b', 'c'], 'd']),
                                   body.copy()) == '(foo(b, c) AS d)'
        assert complex_column_expr(tuplify(['foo', ['b', 'c', ['d']]]),
                                   body.copy()) == 'foo(b, c(d))'

        # we may move these to special Snuba function calls in the future
        assert complex_column_expr(tuplify(['topK', [3], ['project_id']]),
                                   body.copy()) == 'topK(3)(project_id)'
        assert complex_column_expr(
            tuplify(['topK', [3], ['project_id'], 'baz']),
            body.copy()) == '(topK(3)(project_id) AS baz)'

        assert complex_column_expr(tuplify(['emptyIfNull', ['project_id']]),
                                   body.copy()) == 'ifNull(project_id, \'\')'
        assert complex_column_expr(
            tuplify(['emptyIfNull', ['project_id'], 'foo']),
            body.copy()) == '(ifNull(project_id, \'\') AS foo)'

        # TODO once search_message is filled in everywhere, this can be just 'message' again.
        message_expr = '(coalesce(search_message, message) AS message)'
        assert complex_column_expr(tuplify(['positionCaseInsensitive', ['message', "'lol 'single' quotes'"]]), body.copy())\
                == "positionCaseInsensitive({message_expr}, 'lol \\'single\\' quotes')".format(**locals())

        # dangerous characters are allowed but escaped in literals and column names
        assert complex_column_expr(tuplify(['safe', ['fo`o', "'ba'r'"]]),
                                   body.copy()) == r"safe(`fo\`o`, 'ba\'r')"

        # Dangerous characters not allowed in functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                tuplify([r"dang'erous", ['message', '`']]), body.copy())

        # Or nested functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                tuplify([r"safe", ['dang`erous', ['message']]]), body.copy())
Exemple #18
0
    def test_conditions_expr(self, dataset):
        state.set_config('use_escape_alias', 1)
        conditions = [['a', '=', 1]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [[['a', '=', 1]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [['a', '=', 1], ['b', '=', 2]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1 AND b = 2'

        conditions = [[['a', '=', 1], ['b', '=', 2]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2)'

        conditions = [[['a', '=', 1], ['b', '=', 2]], ['c', '=', 3]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND c = 3'

        conditions = [[['a', '=', 1], ['b', '=', 2]], [['c', '=', 3], ['d', '=', 4]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND (c = 3 OR d = 4)'

        # Malformed condition input
        conditions = [[['a', '=', 1], []]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        # Test column expansion
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        expanded = column_expr(dataset, 'tags[foo]', Query({}), ParsingContext())
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '({} = 1 OR b = 2)'.format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({})
        parsing_context = ParsingContext()
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        column_expr(dataset, 'tags[foo]', reuse_query, parsing_context)  # Expand it once so the next time is aliased
        assert conditions_expr(dataset, conditions, reuse_query, parsing_context) == '(`tags[foo]` = 1 OR b = 2)'

        # Test special output format of LIKE
        conditions = [['primary_hash', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'primary_hash LIKE \'%foo%\''

        conditions = tuplify([[['notEmpty', ['arrayElement', ['exception_stacks.type', 1]]], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1'

        conditions = tuplify([[['notEmpty', ['tags[sentry:user]']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1'

        conditions = tuplify([[['notEmpty', ['tags_key']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((arrayJoin(tags.key) AS tags_key)) = 1'

        conditions = tuplify([
            [
                [['notEmpty', ['tags[sentry:environment]']], '=', 'dev'], [['notEmpty', ['tags[sentry:environment]']], '=', 'prod']
            ],
            [
                [['notEmpty', ['tags[sentry:user]']], '=', 'joe'], [['notEmpty', ['tags[sentry:user]']], '=', 'bob']
            ],
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == \
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [['exception_frames.filename', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [['exception_frames.filename', 'NOT LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayAll(x -> assumeNotNull(x NOT LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([
            ['platform', 'IN', ['a', 'b', 'c']],
            ['platform', 'IN', ['c', 'b', 'a']]
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == "platform IN ('a', 'b', 'c')"
Exemple #19
0
def test_simple_column_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())

    body = {"granularity": 86400}
    query = Query(body, source)
    assert (column_expr(
        dataset, "events.event_id", deepcopy(query),
        ParsingContext()) == "(events.event_id AS `events.event_id`)")

    assert (column_expr(dataset, "groups.id", deepcopy(query),
                        ParsingContext()) == "(groups.id AS `groups.id`)")

    assert (column_expr(
        dataset,
        "events.event_id",
        deepcopy(query),
        ParsingContext(),
        "MyVerboseAlias",
    ) == "(events.event_id AS MyVerboseAlias)")

    # Single tag expression
    assert (
        column_expr(dataset, "events.tags[foo]", deepcopy(query),
                    ParsingContext()) ==
        "(events.tags.value[indexOf(events.tags.key, 'foo')] AS `events.tags[foo]`)"
    )

    # Promoted tag expression / no translation
    assert (column_expr(dataset, "events.tags[server_name]", deepcopy(query),
                        ParsingContext()) ==
            "(events.server_name AS `events.tags[server_name]`)")

    # All tag keys expression
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert column_expr(dataset, "events.tags_key", q, ParsingContext()) == (
        "(arrayJoin(events.tags.key) AS `events.tags_key`)")

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {"groupby": ["events.tags_key", "events.tags_value"]}
    parsing_context = ParsingContext()
    assert column_expr(dataset, "events.tags_key", Query(
        tag_group_body, source
    ), parsing_context) == (
        "(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) "
        "AS all_tags))[1] AS `events.tags_key`)")

    assert (column_expr(
        dataset, "events.time", deepcopy(query),
        ParsingContext()) == "(toDate(events.timestamp) AS `events.time`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        aggregate="sum") == "(sum(events.col) AS `events.col`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        alias="summation",
        aggregate="sum",
    ) == "(sum(events.col) AS summation)")

    assert (column_expr(
        dataset,
        "",
        deepcopy(query),
        ParsingContext(),
        alias="aggregate",
        aggregate="count()",
    ) == "(count() AS aggregate)")

    # Columns that need escaping
    assert (column_expr(dataset, "events.sentry:release", deepcopy(query),
                        ParsingContext()) == "`events.sentry:release`")

    # A 'column' that is actually a string literal
    assert (column_expr(dataset, "'hello world'", deepcopy(query),
                        ParsingContext()) == "'hello world'")

    # Complex expressions (function calls) involving both string and column arguments
    assert (column_expr(
        dataset,
        tuplify(["concat", ["a", "':'", "b"]]),
        deepcopy(query),
        ParsingContext(),
    ) == "concat(a, ':', b)")

    group_id_body = deepcopy(query)
    assert (column_expr(dataset, "events.group_id", group_id_body,
                        ParsingContext()) ==
            "(nullIf(events.group_id, 0) AS `events.group_id`)")

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert (column_expr(
        dataset,
        "events.tags[environment]",
        deepcopy(query),
        ParsingContext(),
        alias="unique_envs",
        aggregate="uniq",
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)")
Exemple #20
0
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query:
    def build_selected_expressions(
        raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
        output = []
        for raw_expression in raw_expressions:
            exp = parse_expression(tuplify(raw_expression),
                                   entity.get_data_model(), set())
            output.append(
                SelectedExpression(
                    # An expression in the query can be a string or a
                    # complex list with an alias. In the second case
                    # we trust the parser to find the alias.
                    name=raw_expression
                    if isinstance(raw_expression, str) else exp.alias,
                    expression=exp,
                ))
        return output

    aggregations = []
    for aggregation in body.get("aggregations", []):
        if not isinstance(aggregation, Sequence):
            raise ParsingException((
                f"Invalid aggregation structure {aggregation}. "
                "It must be a sequence containing expression, column and alias."
            ))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregations.append(
            SelectedExpression(
                name=alias,
                expression=parse_aggregation(
                    aggregation_function,
                    column_expr,
                    alias,
                    entity.get_data_model(),
                    set(),
                ),
            ))

    groupby_clause = build_selected_expressions(
        to_list(body.get("groupby", [])))

    select_clause = (
        groupby_clause + aggregations +
        build_selected_expressions(body.get("selected_columns", [])))

    array_join_cols = set()
    arrayjoin = body.get("arrayjoin")
    # TODO: Properly detect all array join columns in all clauses of the query.
    # This is missing an arrayJoin in condition with an alias that is then
    # used in the select.
    if arrayjoin:
        array_join_cols.add(arrayjoin)
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"], entity.get_data_model(), {arrayjoin})
    else:
        array_join_expr = None
        for select_expr in select_clause:
            if isinstance(select_expr.expression, FunctionCall):
                if select_expr.expression.function_name == "arrayJoin":
                    parameters = select_expr.expression.parameters
                    if len(parameters) != 1:
                        raise ParsingException(
                            "arrayJoin(...) only accepts a single parameter.")
                    if isinstance(parameters[0], Column):
                        array_join_cols.add(parameters[0].column_name)
                    else:
                        # We only accepts columns or functions that do not
                        # reference columns. We could not say whether we are
                        # actually arrayjoining on the values of the column
                        # if it is nested in an arbitrary function. But
                        # functions of literals are fine.
                        for e in parameters[0]:
                            if isinstance(e, Column):
                                raise ParsingException(
                                    "arrayJoin(...) cannot contain columns nested in functions."
                                )

    where_expr = parse_conditions_to_expr(body.get("conditions", []), entity,
                                          array_join_cols)
    having_expr = parse_conditions_to_expr(body.get("having", []), entity,
                                           array_join_cols)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is a string, "
                    "it must respect the format `[-]column`"))
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is an expression, "
                    "the function name must respect the format `[-]func_name`"
                ))
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ParsingException(
                (f"Invalid Order By clause {orderby}. The Clause was neither "
                 "a string nor a function call."))
        orderby_parsed = parse_expression(tuplify(orderby),
                                          entity.get_data_model(), set())
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    return Query(
        body,
        None,
        selected_columns=select_clause,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=[g.expression for g in groupby_clause],
        having=having_expr,
        order_by=orderby_exprs,
    )
Exemple #21
0
def test_conditions_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())
    state.set_config("use_escape_alias", 1)
    conditions = [["events.a", "=", 1]]
    query = Query({}, source)
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "(events.a AS `events.a`) = 1")

    conditions = [
        [["events.a", "=", 1], ["groups.b", "=", 2]],
        [["events.c", "=", 3], ["groups.d", "=", 4]],
    ]
    assert conditions_expr(
        dataset, conditions, deepcopy(query), ParsingContext()
    ) == (
        "((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)"
        " AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)")

    # Test column expansion
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    expanded = column_expr(dataset, "events.tags[foo]", deepcopy(query),
                           ParsingContext())
    assert conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "({} = 1 OR (groups.b AS `groups.b`) = 2)".format(
            expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = deepcopy(query)
    parsing_context = ParsingContext()
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    column_expr(dataset, "events.tags[foo]", reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert (conditions_expr(dataset, conditions, reuse_query, parsing_context)
            == "(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)")

    # Test special output format of LIKE
    conditions = [["events.primary_hash", "LIKE", "%foo%"]]
    assert (conditions_expr(dataset, conditions, deepcopy(query),
                            ParsingContext()) ==
            "(events.primary_hash AS `events.primary_hash`) LIKE '%foo%'")

    conditions = tuplify(
        [[["notEmpty", ["arrayElement", ["events.exception_stacks.type", 1]]],
          "=", 1]])
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1"
    )

    conditions = tuplify([[["notEmpty", ["events.tags[sentry:user]"]], "=",
                           1]])
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "notEmpty(`events.tags[sentry:user]`) = 1")

    conditions = tuplify([[["notEmpty", ["events.tags_key"]], "=", 1]])
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert (conditions_expr(dataset, conditions, q, ParsingContext()) ==
            "notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1")

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [["events.exception_frames.filename", "LIKE", "%foo%"]]
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (events.exception_frames.filename AS `events.exception_frames.filename`))"
    )
Exemple #22
0
import pytest

from snuba.datasets.entities import EntityKey
from snuba.datasets.entities.factory import get_entity
from snuba.query.conditions import (
    BooleanFunctions,
    ConditionFunctions,
    binary_condition,
)
from snuba.query.expressions import Argument, Column, FunctionCall, Lambda, Literal
from snuba.query.parser.functions import parse_function_to_expr
from snuba.util import tuplify

test_data = [
    (tuplify(["count", []]), FunctionCall(None, "count", ())),
    (
        tuplify(["notEmpty", ["foo"]]),
        FunctionCall(None, "notEmpty", (Column(None, None, "foo"), )),
    ),
    (
        tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]),
        FunctionCall(
            None,
            "notEmpty",
            (FunctionCall(None, "arrayElement",
                          (Column(None, None, "foo"), Literal(None, 1))), ),
        ),
    ),
    (
        tuplify(["foo", ["bar", ["qux"], "baz"]]),
        FunctionCall(
Exemple #23
0
def parse_and_run_query(validated_body, timer):
    body = deepcopy(validated_body)
    turbo = body.get('turbo', False)
    max_days, table, date_align, config_sample, force_final, max_group_ids_exclude = state.get_configs([
        ('max_days', None),
        ('clickhouse_table', settings.CLICKHOUSE_TABLE),
        ('date_align_seconds', 1),
        ('sample', 1),
        # 1: always use FINAL, 0: never use final, undefined/None: use project setting.
        ('force_final', 0 if turbo else None),
        ('max_group_ids_exclude', settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE),
    ])
    stats = {}
    to_date = util.parse_datetime(body['to_date'], date_align)
    from_date = util.parse_datetime(body['from_date'], date_align)
    assert from_date <= to_date

    if max_days is not None and (to_date - from_date).days > max_days:
        from_date = to_date - timedelta(days=max_days)

    where_conditions = body.get('conditions', [])
    where_conditions.extend([
        ('timestamp', '>=', from_date),
        ('timestamp', '<', to_date),
        ('deleted', '=', 0),
    ])
    # NOTE: we rely entirely on the schema to make sure that regular snuba
    # queries are required to send a project_id filter. Some other special
    # internal query types do not require a project_id filter.
    project_ids = util.to_list(body['project'])
    if project_ids:
        where_conditions.append(('project_id', 'IN', project_ids))

    having_conditions = body.get('having', [])

    aggregate_exprs = [
        util.column_expr(col, body, alias, agg)
        for (agg, col, alias) in body['aggregations']
    ]
    groupby = util.to_list(body['groupby'])
    group_exprs = [util.column_expr(gb, body) for gb in groupby]

    selected_cols = [util.column_expr(util.tuplify(colname), body)
                     for colname in body.get('selected_columns', [])]

    select_exprs = group_exprs + aggregate_exprs + selected_cols
    select_clause = u'SELECT {}'.format(', '.join(select_exprs))

    from_clause = u'FROM {}'.format(table)

    # For now, we only need FINAL if:
    #    1. The project has been marked as needing FINAL (in redis) because of recent
    #       replacements (and it affects too many groups for us just to exclude
    #       those groups from the query)
    #    OR
    #    2. the force_final setting = 1
    needs_final, exclude_group_ids = get_projects_query_flags(project_ids)
    if len(exclude_group_ids) > max_group_ids_exclude:
        # Cap the number of groups to exclude by query and flip to using FINAL if necessary
        needs_final = True
        exclude_group_ids = []

    used_final = False
    if force_final == 1 or (force_final is None and needs_final):
        from_clause = u'{} FINAL'.format(from_clause)
        used_final = True
    elif exclude_group_ids:
        where_conditions.append(('group_id', 'NOT IN', exclude_group_ids))

    sample = body.get('sample', settings.TURBO_SAMPLE_RATE if turbo else config_sample)
    if sample != 1:
        from_clause = u'{} SAMPLE {}'.format(from_clause, sample)

    joins = []

    if 'arrayjoin' in body:
        joins.append(u'ARRAY JOIN {}'.format(body['arrayjoin']))
    join_clause = ' '.join(joins)

    where_clause = ''
    if where_conditions:
        where_conditions = list(set(util.tuplify(where_conditions)))
        where_clause = u'WHERE {}'.format(util.conditions_expr(where_conditions, body))

    prewhere_conditions = []
    if settings.PREWHERE_KEYS:
        # Add any condition to PREWHERE if:
        # - It is a single top-level condition (not OR-nested), and
        # - Any of its referenced columns are in PREWHERE_KEYS
        prewhere_candidates = [
            (util.columns_in_expr(cond[0]), cond)
            for cond in where_conditions if util.is_condition(cond) and
            any(col in settings.PREWHERE_KEYS for col in util.columns_in_expr(cond[0]))
        ]
        # Use the condition that has the highest priority (based on the
        # position of its columns in the PREWHERE_KEYS list)
        prewhere_candidates = sorted([
            (min(settings.PREWHERE_KEYS.index(col) for col in cols if col in settings.PREWHERE_KEYS), cond)
            for cols, cond in prewhere_candidates
        ])
        if prewhere_candidates:
            prewhere_conditions = [cond for _, cond in prewhere_candidates][:settings.MAX_PREWHERE_CONDITIONS]

    prewhere_clause = ''
    if prewhere_conditions:
        prewhere_clause = u'PREWHERE {}'.format(util.conditions_expr(prewhere_conditions, body))

    having_clause = ''
    if having_conditions:
        assert groupby, 'found HAVING clause with no GROUP BY'
        having_clause = u'HAVING {}'.format(util.conditions_expr(having_conditions, body))

    group_clause = ', '.join(util.column_expr(gb, body) for gb in groupby)
    if group_clause:
        if body.get('totals', False):
            group_clause = 'GROUP BY ({}) WITH TOTALS'.format(group_clause)
        else:
            group_clause = 'GROUP BY ({})'.format(group_clause)

    order_clause = ''
    if body.get('orderby'):
        orderby = [util.column_expr(util.tuplify(ob), body) for ob in util.to_list(body['orderby'])]
        orderby = [u'{} {}'.format(
            ob.lstrip('-'),
            'DESC' if ob.startswith('-') else 'ASC'
        ) for ob in orderby]
        order_clause = u'ORDER BY {}'.format(', '.join(orderby))

    limitby_clause = ''
    if 'limitby' in body:
        limitby_clause = 'LIMIT {} BY {}'.format(*body['limitby'])

    limit_clause = ''
    if 'limit' in body:
        limit_clause = 'LIMIT {}, {}'.format(body.get('offset', 0), body['limit'])

    sql = ' '.join([c for c in [
        select_clause,
        from_clause,
        join_clause,
        prewhere_clause,
        where_clause,
        group_clause,
        having_clause,
        order_clause,
        limitby_clause,
        limit_clause
    ] if c])

    timer.mark('prepare_query')

    stats.update({
        'clickhouse_table': table,
        'final': used_final,
        'referrer': request.referrer,
        'num_days': (to_date - from_date).days,
        'num_projects': len(project_ids),
        'sample': sample,
    })

    return util.raw_query(
        validated_body, sql, clickhouse_ro, timer, stats
    )