예제 #1
0
파일: test_util.py 프로젝트: ruezetle/snuba
    def test_nested_aggregate_legacy_format(self, dataset):
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        priority = [
            "toUInt64(plus(multiply(log(times_seen), 600), last_seen))",
            "",
            "priority",
        ]
        assert (
            column_expr(
                dataset,
                "",
                Query({"aggregations": [priority]}, source),
                ParsingContext(),
                priority[2],
                priority[0],
            ) ==
            "(toUInt64(plus(multiply(log(times_seen), 600), last_seen)) AS priority)"
        )

        top_k = ["topK(3)", "logger", "top_3"]
        assert (column_expr(
            dataset,
            top_k[1],
            Query({"aggregations": [top_k]}, source),
            ParsingContext(),
            top_k[2],
            top_k[0],
        ) == "(topK(3)(logger) AS top_3)")
예제 #2
0
def test_conditions_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)
    conditions = [['events.a', '=', 1]]
    assert conditions_expr(dataset, conditions, Query({}),
                           ParsingContext()) == '(events.a AS `events.a`) = 1'

    conditions = [[['events.a', '=', 1], ['groups.b', '=', 2]],
                  [['events.c', '=', 3], ['groups.d', '=', 4]]]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == ('((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)'
        ' AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)'
        )

    # Test column expansion
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    expanded = column_expr(dataset, 'events.tags[foo]', Query({}),
                           ParsingContext())
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '({} = 1 OR (groups.b AS `groups.b`) = 2)'.format(expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = Query({})
    parsing_context = ParsingContext()
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    column_expr(dataset, 'events.tags[foo]', reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert conditions_expr(dataset, conditions, reuse_query, parsing_context) \
        == '(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)'

    # Test special output format of LIKE
    conditions = [['events.primary_hash', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '(events.primary_hash AS `events.primary_hash`) LIKE \'%foo%\''

    conditions = tuplify(
        [[['notEmpty', ['arrayElement', ['events.exception_stacks.type', 1]]],
          '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags[sentry:user]']], '=',
                           1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(`events.tags[sentry:user]`) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags_key']], '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1'

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [['events.exception_frames.filename', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (events.exception_frames.filename AS `events.exception_frames.filename`))'
예제 #3
0
def test_order_by():
    dataset = get_dataset("groups")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    body = {}
    query = Query(body, source)

    assert (
        column_expr(dataset, "-events.event_id", deepcopy(query), ParsingContext())
        == "-(events.event_id AS `events.event_id`)"
    )

    context = ParsingContext()
    context.add_alias("`events.event_id`")
    assert (
        column_expr(dataset, "-events.event_id", deepcopy(query), context,)
        == "-`events.event_id`"
    )
예제 #4
0
    def test_nested_aggregate_legacy_format(self, dataset):
        source = dataset.get_dataset_schemas().get_read_schema(
        ).get_data_source()
        priority = [
            'toUInt64(plus(multiply(log(times_seen), 600), last_seen))', '',
            'priority'
        ]
        assert column_expr(
            dataset, '', Query({'aggregations': [priority]}, source),
            ParsingContext(), priority[2], priority[0]
        ) == '(toUInt64(plus(multiply(log(times_seen), 600), last_seen)) AS priority)'

        top_k = ['topK(3)', 'logger', 'top_3']
        assert column_expr(dataset, top_k[1],
                           Query({'aggregations': [top_k]}, source),
                           ParsingContext(), top_k[2],
                           top_k[0]) == '(topK(3)(logger) AS top_3)'
예제 #5
0
    def test_alias_in_alias(self):
        source = self.dataset.get_dataset_schemas().get_read_schema().get_data_source()
        query = Query({"groupby": ["tags_key", "tags_value"]}, source,)
        context = ParsingContext()
        assert column_expr(self.dataset, "tags_key", query, context) == (
            "(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) "
            "AS all_tags))[1] AS tags_key)"
        )

        # If we want to use `tags_key` again, make sure we use the
        # already-created alias verbatim
        assert column_expr(self.dataset, "tags_key", query, context) == "tags_key"
        # If we also want to use `tags_value`, make sure that we use
        # the `all_tags` alias instead of re-expanding the tags arrayJoin
        assert (
            column_expr(self.dataset, "tags_value", query, context)
            == "((all_tags)[2] AS tags_value)"
        )
예제 #6
0
def test_alias_in_alias():
    state.set_config('use_escape_alias', 1)
    dataset = get_dataset("groups")
    body = {'groupby': ['events.tags_key', 'events.tags_value']}
    query = Query(body)
    parsing_context = ParsingContext()
    assert column_expr(dataset, 'events.tags_key', query, parsing_context) == (
        '(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) '
        'AS all_tags))[1] AS `events.tags_key`)')

    # If we want to use `tags_key` again, make sure we use the
    # already-created alias verbatim
    assert column_expr(dataset, 'events.tags_key', query,
                       parsing_context) == '`events.tags_key`'
    # If we also want to use `tags_value`, make sure that we use
    # the `all_tags` alias instead of re-expanding the tags arrayJoin
    assert column_expr(
        dataset, 'events.tags_value', query,
        parsing_context) == '((all_tags)[2] AS `events.tags_value`)'
예제 #7
0
def test_alias_in_alias():
    state.set_config("use_escape_alias", 1)
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())
    body = {"groupby": ["events.tags_key", "events.tags_value"]}
    query = Query(body, source)
    parsing_context = ParsingContext()
    assert column_expr(dataset, "events.tags_key", query, parsing_context) == (
        "(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) "
        "AS all_tags))[1] AS `events.tags_key`)")

    # If we want to use `tags_key` again, make sure we use the
    # already-created alias verbatim
    assert (column_expr(dataset, "events.tags_key", query,
                        parsing_context) == "`events.tags_key`")
    # If we also want to use `tags_value`, make sure that we use
    # the `all_tags` alias instead of re-expanding the tags arrayJoin
    assert (column_expr(
        dataset, "events.tags_value", query,
        parsing_context) == "((all_tags)[2] AS `events.tags_value`)")
예제 #8
0
파일: test_util.py 프로젝트: ruezetle/snuba
 def test_apdex_expression(self, dataset):
     body = {"aggregations": [["apdex(duration, 300)", "", "apdex_score"]]}
     parsing_context = ParsingContext()
     source = (dataset.get_all_storages()
               [0].get_schemas().get_read_schema().get_data_source())
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == [
         "((countIf(duration <= 300) + (countIf((duration > 300) AND (duration <= 1200)) / 2)) / count() AS apdex_score)"
     ]
예제 #9
0
    def test_order_by(self):
        """
        Order by in Snuba are represented as -COL_NAME when ordering DESC.
        since the column is provided with the `-` character in front when reaching
        the column_expr call, this can introduce a ton of corner cases depending
        whether the column is aliased, whether it gets processed into something
        else or whether it is escaped.

        This test is supposed to cover those cases.
        """
        source = self.dataset.get_dataset_schemas().get_read_schema().get_data_source()
        query = Query({}, source)
        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert (
            column_expr(self.dataset, "-timestamp", deepcopy(query), ParsingContext())
            == "-timestamp"
        )
        assert (
            column_expr(
                self.dataset, "-sentry:release", deepcopy(query), ParsingContext()
            )
            == "-`sentry:release`"
        )

        context = ParsingContext()
        context.add_alias("al1")
        assert (
            column_expr(self.dataset, "-timestamp", deepcopy(query), context, "al1")
            == "-al1"
        )

        assert (
            column_expr(
                self.dataset, "-timestamp", deepcopy(query), ParsingContext(), "al1"
            )
            == "-(timestamp AS al1)"
        )

        assert (
            column_expr(
                self.dataset,
                "-exception_stacks.type",
                deepcopy(query),
                ParsingContext(),
            )
            == "-(exception_stacks.type AS `exception_stacks.type`)"
        )

        context = ParsingContext()
        context.add_alias("`exception_stacks.type`")
        assert (
            column_expr(
                self.dataset, "-exception_stacks.type", deepcopy(query), context,
            )
            == "-`exception_stacks.type`"
        )
예제 #10
0
 def test_impact_expression(self, dataset):
     body = {
         "aggregations":
         [["impact(duration, 300, user)", "", "impact_score"]]
     }
     parsing_context = ParsingContext()
     source = dataset.get_dataset_schemas().get_read_schema(
     ).get_data_source()
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == [
         "((1 - (countIf(duration <= 300) + (countIf((duration > 300) AND (duration <= 1200)) / 2)) / count()) + ((1 - (1 / sqrt(uniq(user)))) * 3) AS impact_score)"
     ]
예제 #11
0
 def test_duplicate_expression_alias(self, dataset):
     body = {
         'aggregations': [
             ['top3', 'logger', 'dupe_alias'],
             ['uniq', 'environment', 'dupe_alias'],
         ]
     }
     parsing_context = ParsingContext()
     # In the case where 2 different expressions are aliased
     # to the same thing, one ends up overwriting the other.
     # This may not be ideal as it may mask bugs in query conditions
     exprs = [
         column_expr(dataset, col, Query(body), parsing_context, alias, agg)
         for (agg, col, alias) in body['aggregations']
     ]
     assert exprs == ['(topK(3)(logger) AS dupe_alias)', 'dupe_alias']
예제 #12
0
파일: test_util.py 프로젝트: ruezetle/snuba
 def test_duplicate_expression_alias(self, dataset):
     body = {
         "aggregations": [
             ["top3", "logger", "dupe_alias"],
             ["uniq", "environment", "dupe_alias"],
         ]
     }
     parsing_context = ParsingContext()
     source = (dataset.get_all_storages()
               [0].get_schemas().get_read_schema().get_data_source())
     # In the case where 2 different expressions are aliased
     # to the same thing, one ends up overwriting the other.
     # This may not be ideal as it may mask bugs in query conditions
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == ["(topK(3)(logger) AS dupe_alias)", "dupe_alias"]
예제 #13
0
def test_duplicate_expression_alias():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)

    body = {
        'aggregations': [
            ['top3', 'events.logger', 'dupe_alias'],
            ['uniq', 'events.environment', 'dupe_alias'],
        ]
    }
    query = Query(body)
    # In the case where 2 different expressions are aliased
    # to the same thing, one ends up overwriting the other.
    # This may not be ideal as it may mask bugs in query conditions
    parsing_context = ParsingContext()
    exprs = [
        column_expr(dataset, col, query, parsing_context, alias, agg)
        for (agg, col, alias) in body['aggregations']
    ]
    assert exprs == ['(topK(3)(events.logger) AS dupe_alias)', 'dupe_alias']
예제 #14
0
def test_duplicate_expression_alias():
    dataset = get_dataset("groups")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    state.set_config("use_escape_alias", 1)

    body = {
        "aggregations": [
            ["top3", "events.logger", "dupe_alias"],
            ["uniq", "events.environment", "dupe_alias"],
        ]
    }
    query = Query(body, source)
    # In the case where 2 different expressions are aliased
    # to the same thing, one ends up overwriting the other.
    # This may not be ideal as it may mask bugs in query conditions
    parsing_context = ParsingContext()
    exprs = [
        column_expr(dataset, col, query, parsing_context, alias, agg)
        for (agg, col, alias) in body["aggregations"]
    ]
    assert exprs == ["(topK(3)(events.logger) AS dupe_alias)", "dupe_alias"]
예제 #15
0
def test_simple_column_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)

    body = {'granularity': 86400}
    query = Query(body)
    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext()) \
        == "(events.event_id AS `events.event_id`)"

    assert column_expr(dataset, "groups.id", deepcopy(query), ParsingContext()) \
        == "(groups.id AS `groups.id`)"

    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext(), "MyVerboseAlias") \
        == "(events.event_id AS MyVerboseAlias)"

    # Single tag expression
    assert column_expr(dataset, 'events.tags[foo]', deepcopy(query), ParsingContext()) ==\
        "(events.tags.value[indexOf(events.tags.key, \'foo\')] AS `events.tags[foo]`)"

    # Promoted tag expression / no translation
    assert column_expr(dataset, 'events.tags[server_name]', deepcopy(query), ParsingContext()) ==\
        "(events.server_name AS `events.tags[server_name]`)"

    # All tag keys expression
    assert column_expr(dataset, 'events.tags_key', deepcopy(query),
                       ParsingContext()) == (
                           '(arrayJoin(events.tags.key) AS `events.tags_key`)')

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {'groupby': ['events.tags_key', 'events.tags_value']}
    parsing_context = ParsingContext()
    assert column_expr(
        dataset, 'events.tags_key', Query(tag_group_body), parsing_context
    ) == (
        '(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) '
        'AS all_tags))[1] AS `events.tags_key`)')

    assert column_expr(dataset, 'events.time', deepcopy(query), ParsingContext()) ==\
        "(toDate(events.timestamp) AS `events.time`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), aggregate='sum') ==\
        "(sum(events.col) AS `events.col`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), alias='summation', aggregate='sum') ==\
        "(sum(events.col) AS summation)"

    assert column_expr(dataset, '', deepcopy(query), ParsingContext(), alias='aggregate', aggregate='count()') ==\
        "(count() AS aggregate)"

    # Columns that need escaping
    assert column_expr(dataset, 'events.sentry:release', deepcopy(query),
                       ParsingContext()) == '`events.sentry:release`'

    # A 'column' that is actually a string literal
    assert column_expr(dataset, '\'hello world\'', deepcopy(query),
                       ParsingContext()) == '\'hello world\''

    # Complex expressions (function calls) involving both string and column arguments
    assert column_expr(dataset, tuplify(['concat', ['a', '\':\'', 'b']]),
                       deepcopy(query),
                       ParsingContext()) == 'concat(a, \':\', b)'

    group_id_body = deepcopy(query)
    assert column_expr(
        dataset, 'events.issue', group_id_body,
        ParsingContext()) == '(nullIf(events.group_id, 0) AS `events.issue`)'

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert column_expr(
        dataset,
        'events.tags[environment]',
        deepcopy(query),
        ParsingContext(),
        alias='unique_envs',
        aggregate='uniq'
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)"
예제 #16
0
파일: query.py 프로젝트: jiankunking/snuba
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u"SELECT {}".format(
            ", ".join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u"FROM {}".format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u"{} FINAL".format(from_clause)

        if not query.get_data_source().supports_sample():
            sample_rate = None
        else:
            if query.get_sample():
                sample_rate = query.get_sample()
            elif settings.get_turbo():
                sample_rate = snuba_settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None

        if sample_rate:
            from_clause = u"{} SAMPLE {}".format(from_clause, sample_rate)

        join_clause = ""
        if query.get_arrayjoin():
            join_clause = u"ARRAY JOIN {}".format(query.get_arrayjoin())

        where_clause = ""
        if query.get_conditions():
            where_clause = u"WHERE {}".format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ""
        if query.get_prewhere():
            prewhere_clause = u"PREWHERE {}".format(
                conditions_expr(dataset, query.get_prewhere(), query,
                                parsing_context))

        group_clause = ""
        if groupby:
            group_clause = "GROUP BY ({})".format(", ".join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = "{} WITH TOTALS".format(group_clause)

        having_clause = ""
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, "found HAVING clause with no GROUP BY"
            having_clause = u"HAVING {}".format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ""
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u"{} {}".format(ob.lstrip("-"),
                                "DESC" if ob.startswith("-") else "ASC")
                for ob in orderby
            ]
            order_clause = u"ORDER BY {}".format(", ".join(orderby))

        limitby_clause = ""
        if query.get_limitby() is not None:
            limitby_clause = "LIMIT {} BY {}".format(*query.get_limitby())

        limit_clause = ""
        if query.get_limit() is not None:
            limit_clause = "LIMIT {}, {}".format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = " ".join([
            c for c in [
                select_clause,
                from_clause,
                join_clause,
                prewhere_clause,
                where_clause,
                group_clause,
                having_clause,
                order_clause,
                limitby_clause,
                limit_clause,
            ] if c
        ])
예제 #17
0
파일: query.py 프로젝트: Appva/snuba
    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
        prewhere_conditions: Sequence[str],
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u'SELECT {}'.format(
            ', '.join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u'FROM {}'.format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u'{} FINAL'.format(from_clause)

        if query.get_sample():
            sample_rate = query.get_sample()
        elif settings.get_turbo():
            sample_rate = snuba_settings.TURBO_SAMPLE_RATE
        else:
            sample_rate = None

        if sample_rate:
            from_clause = u'{} SAMPLE {}'.format(from_clause, sample_rate)

        join_clause = ''
        if query.get_arrayjoin():
            join_clause = u'ARRAY JOIN {}'.format(query.get_arrayjoin())

        where_clause = ''
        if query.get_conditions():
            where_clause = u'WHERE {}'.format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ''
        if prewhere_conditions:
            prewhere_clause = u'PREWHERE {}'.format(
                conditions_expr(dataset, prewhere_conditions, query,
                                parsing_context))

        group_clause = ''
        if groupby:
            group_clause = 'GROUP BY ({})'.format(', '.join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = '{} WITH TOTALS'.format(group_clause)

        having_clause = ''
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, 'found HAVING clause with no GROUP BY'
            having_clause = u'HAVING {}'.format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ''
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u'{} {}'.format(ob.lstrip('-'),
                                'DESC' if ob.startswith('-') else 'ASC')
                for ob in orderby
            ]
            order_clause = u'ORDER BY {}'.format(', '.join(orderby))

        limitby_clause = ''
        if query.get_limitby() is not None:
            limitby_clause = 'LIMIT {} BY {}'.format(*query.get_limitby())

        limit_clause = ''
        if query.get_limit() is not None:
            limit_clause = 'LIMIT {}, {}'.format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = ' '.join([
            c for c in [
                select_clause, from_clause, join_clause, prewhere_clause,
                where_clause, group_clause, having_clause, order_clause,
                limitby_clause, limit_clause
            ] if c
        ])
예제 #18
0
파일: test_util.py 프로젝트: ruezetle/snuba
    def test_conditions_expr(self, dataset):
        state.set_config("use_escape_alias", 1)
        conditions = [["a", "=", 1]]
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = []
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[[]], []]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[["a", "=", 1]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = [["a", "=", 1], ["b", "=", 2]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1 AND b = 2")

        conditions = [[["a", "=", 1], ["b", "=", 2]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "(a = 1 OR b = 2)")

        conditions = [[["a", "=", 1], ["b", "=", 2]], ["c", "=", 3]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "(a = 1 OR b = 2) AND c = 3")

        conditions = [[["a", "=", 1], ["b", "=", 2]],
                      [["c", "=", 3], ["d", "=", 4]]]
        assert (conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "(a = 1 OR b = 2) AND (c = 3 OR d = 4)")

        # Malformed condition input
        conditions = [[["a", "=", 1], []]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        # Test column expansion
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        expanded = column_expr(dataset, "tags[foo]", Query({}, source),
                               ParsingContext())
        assert conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "({} = 1 OR b = 2)".format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({}, source)
        parsing_context = ParsingContext()
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        column_expr(
            dataset, "tags[foo]", reuse_query,
            parsing_context)  # Expand it once so the next time is aliased
        assert (conditions_expr(
            dataset, conditions, reuse_query,
            parsing_context) == "(`tags[foo]` = 1 OR b = 2)")

        # Test special output format of LIKE
        conditions = [["primary_hash", "LIKE", "%foo%"]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "primary_hash LIKE '%foo%'")

        conditions = tuplify(
            [[["notEmpty", ["arrayElement", ["exception_stacks.type", 1]]],
              "=", 1]])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1"
        )

        conditions = tuplify([[["notEmpty", ["tags[sentry:user]"]], "=", 1]])
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) ==
                "notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1")

        conditions = tuplify([[["notEmpty", ["tags_key"]], "=", 1]])
        assert (conditions_expr(
            dataset,
            conditions,
            Query({"conditions": [[["notEmpty", ["tags_key"]], "=", 1]]},
                  source),
            ParsingContext(),
        ) == "notEmpty((arrayJoin(tags.key) AS tags_key)) = 1")

        conditions = tuplify([
            [
                [["notEmpty", ["tags[sentry:environment]"]], "=", "dev"],
                [["notEmpty", ["tags[sentry:environment]"]], "=", "prod"],
            ],
            [
                [["notEmpty", ["tags[sentry:user]"]], "=", "joe"],
                [["notEmpty", ["tags[sentry:user]"]], "=", "bob"],
            ],
        ])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""
        )

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [["exception_frames.filename", "LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [["exception_frames.filename", "NOT LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayAll(x -> assumeNotNull(x NOT LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([["platform", "IN", ["a", "b", "c"]],
                              ["platform", "IN", ["c", "b", "a"]]])
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "platform IN ('a', 'b', 'c')")
예제 #19
0
def test_simple_column_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())

    body = {"granularity": 86400}
    query = Query(body, source)
    assert (column_expr(
        dataset, "events.event_id", deepcopy(query),
        ParsingContext()) == "(events.event_id AS `events.event_id`)")

    assert (column_expr(dataset, "groups.id", deepcopy(query),
                        ParsingContext()) == "(groups.id AS `groups.id`)")

    assert (column_expr(
        dataset,
        "events.event_id",
        deepcopy(query),
        ParsingContext(),
        "MyVerboseAlias",
    ) == "(events.event_id AS MyVerboseAlias)")

    # Single tag expression
    assert (
        column_expr(dataset, "events.tags[foo]", deepcopy(query),
                    ParsingContext()) ==
        "(events.tags.value[indexOf(events.tags.key, 'foo')] AS `events.tags[foo]`)"
    )

    # Promoted tag expression / no translation
    assert (column_expr(dataset, "events.tags[server_name]", deepcopy(query),
                        ParsingContext()) ==
            "(events.server_name AS `events.tags[server_name]`)")

    # All tag keys expression
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert column_expr(dataset, "events.tags_key", q, ParsingContext()) == (
        "(arrayJoin(events.tags.key) AS `events.tags_key`)")

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {"groupby": ["events.tags_key", "events.tags_value"]}
    parsing_context = ParsingContext()
    assert column_expr(dataset, "events.tags_key", Query(
        tag_group_body, source
    ), parsing_context) == (
        "(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) "
        "AS all_tags))[1] AS `events.tags_key`)")

    assert (column_expr(
        dataset, "events.time", deepcopy(query),
        ParsingContext()) == "(toDate(events.timestamp) AS `events.time`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        aggregate="sum") == "(sum(events.col) AS `events.col`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        alias="summation",
        aggregate="sum",
    ) == "(sum(events.col) AS summation)")

    assert (column_expr(
        dataset,
        "",
        deepcopy(query),
        ParsingContext(),
        alias="aggregate",
        aggregate="count()",
    ) == "(count() AS aggregate)")

    # Columns that need escaping
    assert (column_expr(dataset, "events.sentry:release", deepcopy(query),
                        ParsingContext()) == "`events.sentry:release`")

    # A 'column' that is actually a string literal
    assert (column_expr(dataset, "'hello world'", deepcopy(query),
                        ParsingContext()) == "'hello world'")

    # Complex expressions (function calls) involving both string and column arguments
    assert (column_expr(
        dataset,
        tuplify(["concat", ["a", "':'", "b"]]),
        deepcopy(query),
        ParsingContext(),
    ) == "concat(a, ':', b)")

    group_id_body = deepcopy(query)
    assert (column_expr(dataset, "events.group_id", group_id_body,
                        ParsingContext()) ==
            "(nullIf(events.group_id, 0) AS `events.group_id`)")

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert (column_expr(
        dataset,
        "events.tags[environment]",
        deepcopy(query),
        ParsingContext(),
        alias="unique_envs",
        aggregate="uniq",
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)")
예제 #20
0
파일: test_events.py 프로젝트: Appva/snuba
    def test_column_expr(self):
        source = self.dataset.get_dataset_schemas().get_read_schema(
        ).get_data_source()
        query = Query(
            {'granularity': 86400},
            source,
        )
        # Single tag expression
        assert column_expr(self.dataset, 'tags[foo]', deepcopy(query), ParsingContext()) ==\
            "(tags.value[indexOf(tags.key, \'foo\')] AS `tags[foo]`)"

        # Promoted tag expression / no translation
        assert column_expr(self.dataset, 'tags[server_name]', deepcopy(query), ParsingContext()) ==\
            "(server_name AS `tags[server_name]`)"

        # Promoted tag expression / with translation
        assert column_expr(self.dataset, 'tags[app.device]', deepcopy(query), ParsingContext()) ==\
            "(app_device AS `tags[app.device]`)"

        # All tag keys expression
        assert column_expr(
            self.dataset, 'tags_key', deepcopy(query),
            ParsingContext()) == ('(arrayJoin(tags.key) AS tags_key)')

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {'groupby': ['tags_key', 'tags_value']}
        assert column_expr(
            self.dataset, 'tags_key', Query(tag_group_body, source),
            ParsingContext()) == (
                '(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) '
                'AS all_tags))[1] AS tags_key)')

        assert column_expr(self.dataset, 'time', deepcopy(query), ParsingContext()) ==\
            "(toDate(timestamp) AS time)"

        assert column_expr(self.dataset, 'rtime', deepcopy(query), ParsingContext()) ==\
            "(toDate(received) AS rtime)"

        assert column_expr(self.dataset, 'col', deepcopy(query), ParsingContext(), aggregate='sum') ==\
            "(sum(col) AS col)"

        assert column_expr(self.dataset, 'col', deepcopy(query), ParsingContext(), alias='summation', aggregate='sum') ==\
            "(sum(col) AS summation)"

        # Special cases where count() doesn't need a column
        assert column_expr(self.dataset, '', deepcopy(query), ParsingContext(), alias='count', aggregate='count()') ==\
            "(count() AS count)"

        assert column_expr(self.dataset, '', deepcopy(query), ParsingContext(), alias='aggregate', aggregate='count()') ==\
            "(count() AS aggregate)"

        # Columns that need escaping
        assert column_expr(self.dataset, 'sentry:release', deepcopy(query),
                           ParsingContext()) == '`sentry:release`'

        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert column_expr(self.dataset, '-timestamp', deepcopy(query),
                           ParsingContext()) == '-timestamp'
        assert column_expr(self.dataset, '-sentry:release', deepcopy(query),
                           ParsingContext()) == '-`sentry:release`'

        # A 'column' that is actually a string literal
        assert column_expr(self.dataset, '\'hello world\'', deepcopy(query),
                           ParsingContext()) == '\'hello world\''

        # Complex expressions (function calls) involving both string and column arguments
        assert column_expr(self.dataset,
                           tuplify(['concat', ['a', '\':\'', 'b']]),
                           deepcopy(query),
                           ParsingContext()) == 'concat(a, \':\', b)'

        group_id_query = deepcopy(query)
        assert column_expr(
            self.dataset, 'issue', group_id_query,
            ParsingContext()) == '(nullIf(group_id, 0) AS issue)'
        assert column_expr(
            self.dataset, 'group_id', group_id_query,
            ParsingContext()) == '(nullIf(group_id, 0) AS group_id)'

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert column_expr(self.dataset,
                           'tags[environment]',
                           deepcopy(query),
                           ParsingContext(),
                           alias='unique_envs',
                           aggregate='uniq'
                           ) == "(ifNull(uniq(environment), 0) AS unique_envs)"
예제 #21
0
    def test_column_expr(self):
        source = (
            self.dataset.get_all_storages()[0]
            .get_schemas()
            .get_read_schema()
            .get_data_source()
        )
        query = Query({"granularity": 86400}, source,)
        # Single tag expression
        assert (
            column_expr(self.dataset, "tags[foo]", deepcopy(query), ParsingContext())
            == "(tags.value[indexOf(tags.key, 'foo')] AS `tags[foo]`)"
        )

        # Promoted tag expression / no translation
        assert (
            column_expr(
                self.dataset, "tags[server_name]", deepcopy(query), ParsingContext()
            )
            == "(server_name AS `tags[server_name]`)"
        )

        # Promoted tag expression / with translation
        assert (
            column_expr(
                self.dataset, "tags[app.device]", deepcopy(query), ParsingContext()
            )
            == "(app_device AS `tags[app.device]`)"
        )

        # Promoted context expression / with translation
        assert (
            column_expr(
                self.dataset,
                "contexts[device.battery_level]",
                deepcopy(query),
                ParsingContext(),
            )
            == "(toString(device_battery_level) AS `contexts[device.battery_level]`)"
        )

        # All tag keys expression
        q = Query({"granularity": 86400, "selected_columns": ["tags_key"]}, source,)
        assert column_expr(self.dataset, "tags_key", q, ParsingContext()) == (
            "(arrayJoin(tags.key) AS tags_key)"
        )

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {"groupby": ["tags_key", "tags_value"]}
        assert column_expr(
            self.dataset, "tags_key", Query(tag_group_body, source), ParsingContext()
        ) == (
            "(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) "
            "AS all_tags))[1] AS tags_key)"
        )

        assert (
            column_expr(self.dataset, "time", deepcopy(query), ParsingContext())
            == "(toDate(timestamp) AS time)"
        )

        assert (
            column_expr(self.dataset, "rtime", deepcopy(query), ParsingContext())
            == "(toDate(received) AS rtime)"
        )

        assert (
            column_expr(
                self.dataset, "col", deepcopy(query), ParsingContext(), aggregate="sum"
            )
            == "(sum(col) AS col)"
        )

        assert (
            column_expr(
                self.dataset,
                "col",
                deepcopy(query),
                ParsingContext(),
                alias="summation",
                aggregate="sum",
            )
            == "(sum(col) AS summation)"
        )

        # Special cases where count() doesn't need a column
        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="count",
                aggregate="count()",
            )
            == "(count() AS count)"
        )

        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="aggregate",
                aggregate="count()",
            )
            == "(count() AS aggregate)"
        )

        # Columns that need escaping
        assert (
            column_expr(
                self.dataset, "sentry:release", deepcopy(query), ParsingContext()
            )
            == "`sentry:release`"
        )

        # A 'column' that is actually a string literal
        assert (
            column_expr(
                self.dataset, "'hello world'", deepcopy(query), ParsingContext()
            )
            == "'hello world'"
        )

        # Complex expressions (function calls) involving both string and column arguments
        assert (
            column_expr(
                self.dataset,
                tuplify(["concat", ["a", "':'", "b"]]),
                deepcopy(query),
                ParsingContext(),
            )
            == "concat(a, ':', b)"
        )

        group_id_query = deepcopy(query)
        assert (
            column_expr(self.dataset, "group_id", group_id_query, ParsingContext())
            == "(nullIf(group_id, 0) AS group_id)"
        )

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert (
            column_expr(
                self.dataset,
                "tags[environment]",
                deepcopy(query),
                ParsingContext(),
                alias="unique_envs",
                aggregate="uniq",
            )
            == "(ifNull(uniq(environment), 0) AS unique_envs)"
        )
예제 #22
0
    def test_conditions_expr(self, dataset):
        state.set_config('use_escape_alias', 1)
        conditions = [['a', '=', 1]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [[['a', '=', 1]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [['a', '=', 1], ['b', '=', 2]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1 AND b = 2'

        conditions = [[['a', '=', 1], ['b', '=', 2]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2)'

        conditions = [[['a', '=', 1], ['b', '=', 2]], ['c', '=', 3]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND c = 3'

        conditions = [[['a', '=', 1], ['b', '=', 2]], [['c', '=', 3], ['d', '=', 4]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND (c = 3 OR d = 4)'

        # Malformed condition input
        conditions = [[['a', '=', 1], []]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        # Test column expansion
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        expanded = column_expr(dataset, 'tags[foo]', Query({}), ParsingContext())
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '({} = 1 OR b = 2)'.format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({})
        parsing_context = ParsingContext()
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        column_expr(dataset, 'tags[foo]', reuse_query, parsing_context)  # Expand it once so the next time is aliased
        assert conditions_expr(dataset, conditions, reuse_query, parsing_context) == '(`tags[foo]` = 1 OR b = 2)'

        # Test special output format of LIKE
        conditions = [['primary_hash', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'primary_hash LIKE \'%foo%\''

        conditions = tuplify([[['notEmpty', ['arrayElement', ['exception_stacks.type', 1]]], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1'

        conditions = tuplify([[['notEmpty', ['tags[sentry:user]']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1'

        conditions = tuplify([[['notEmpty', ['tags_key']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((arrayJoin(tags.key) AS tags_key)) = 1'

        conditions = tuplify([
            [
                [['notEmpty', ['tags[sentry:environment]']], '=', 'dev'], [['notEmpty', ['tags[sentry:environment]']], '=', 'prod']
            ],
            [
                [['notEmpty', ['tags[sentry:user]']], '=', 'joe'], [['notEmpty', ['tags[sentry:user]']], '=', 'bob']
            ],
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == \
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [['exception_frames.filename', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [['exception_frames.filename', 'NOT LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayAll(x -> assumeNotNull(x NOT LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([
            ['platform', 'IN', ['a', 'b', 'c']],
            ['platform', 'IN', ['c', 'b', 'a']]
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == "platform IN ('a', 'b', 'c')"
예제 #23
0
def test_conditions_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())
    state.set_config("use_escape_alias", 1)
    conditions = [["events.a", "=", 1]]
    query = Query({}, source)
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "(events.a AS `events.a`) = 1")

    conditions = [
        [["events.a", "=", 1], ["groups.b", "=", 2]],
        [["events.c", "=", 3], ["groups.d", "=", 4]],
    ]
    assert conditions_expr(
        dataset, conditions, deepcopy(query), ParsingContext()
    ) == (
        "((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)"
        " AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)")

    # Test column expansion
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    expanded = column_expr(dataset, "events.tags[foo]", deepcopy(query),
                           ParsingContext())
    assert conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "({} = 1 OR (groups.b AS `groups.b`) = 2)".format(
            expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = deepcopy(query)
    parsing_context = ParsingContext()
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    column_expr(dataset, "events.tags[foo]", reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert (conditions_expr(dataset, conditions, reuse_query, parsing_context)
            == "(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)")

    # Test special output format of LIKE
    conditions = [["events.primary_hash", "LIKE", "%foo%"]]
    assert (conditions_expr(dataset, conditions, deepcopy(query),
                            ParsingContext()) ==
            "(events.primary_hash AS `events.primary_hash`) LIKE '%foo%'")

    conditions = tuplify(
        [[["notEmpty", ["arrayElement", ["events.exception_stacks.type", 1]]],
          "=", 1]])
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1"
    )

    conditions = tuplify([[["notEmpty", ["events.tags[sentry:user]"]], "=",
                           1]])
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "notEmpty(`events.tags[sentry:user]`) = 1")

    conditions = tuplify([[["notEmpty", ["events.tags_key"]], "=", 1]])
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert (conditions_expr(dataset, conditions, q, ParsingContext()) ==
            "notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1")

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [["events.exception_frames.filename", "LIKE", "%foo%"]]
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (events.exception_frames.filename AS `events.exception_frames.filename`))"
    )