def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    FixedStringArrayColumnProcessor(set(["column1", "column2"]),
                                    32).process_query(unprocessed_query,
                                                      HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            Column(None, None, "column2"),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Exemple #2
0
def test_type_condition_optimizer() -> None:
    cond1 = binary_condition(
        ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1")
    )

    unprocessed_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                ConditionFunctions.NEQ,
                Column(None, None, "type"),
                Literal(None, "transaction"),
            ),
            cond1,
        ),
    )
    expected_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1),
    )
    TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    SliceOfMapOptimizer().process_query(unprocessed_query,
                                        HTTPRequestSettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Exemple #4
0
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    UUIDArrayColumnProcessor(set(["column1", "column2"
                                  ])).process_query(unprocessed_query,
                                                    HTTPRequestSettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            FunctionCall(
                None,
                "arrayMap",
                (
                    Lambda(
                        None,
                        ("x", ),
                        FunctionCall(
                            None,
                            "replaceAll",
                            (
                                FunctionCall(None, "toString",
                                             (Argument(None, "x"), )),
                                Literal(None, "-"),
                                Literal(None, ""),
                            ),
                        ),
                    ),
                    Column(None, None, "column2"),
                ),
            ),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
def test_recursive_useless_condition(
    input_query: ClickhouseQuery,
    expected_query: ClickhouseQuery,
) -> None:
    # copy the condition to the having condition so that we test both being
    # applied in one test
    input_query.set_ast_having(deepcopy(input_query.get_condition()))
    expected_query.set_ast_having(deepcopy(expected_query.get_condition()))
    MappingOptimizer(
        column_name="tags",
        hash_map_name="_tags_hash_map",
        killswitch="tags_hash_map_enabled",
    ).process_query(input_query, HTTPQuerySettings())
    assert input_query == expected_query
Exemple #6
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        def process_condition(exp: Expression) -> Expression:
            result = CONDITION_PATTERN.match(exp)
            if result is not None:
                key_column = result.optional_string(KEY_COL_MAPPING_PARAM)
                if key_column == "tags.key":
                    rhs = result.optional_string(KEY_MAPPING_PARAM)
                    table_name = result.optional_string(TABLE_MAPPING_PARAM)
                    replacement = FunctionCall(
                        exp.alias,
                        "has",
                        (Column(None, table_name, "tags.key"), Literal(None, rhs)),
                    )

                    assert isinstance(exp, FunctionCall)
                    if exp.function_name == ConditionFunctions.EQ:
                        replacement = FunctionCall(exp.alias, "not", (replacement,))

                    prev_value = query.get_experiment_value(
                        "empty-string-tag-condition"
                    )
                    if prev_value is not None:
                        return replacement if prev_value == "true" else exp

                    if settings.TESTING or random.random() < 0.5:
                        query.add_experiment("empty-string-tag-condition", "true")
                        return replacement
                    else:
                        query.add_experiment("empty-string-tag-condition", "false")

            return exp

        condition = query.get_condition()
        if condition is not None:
            query.set_ast_condition(condition.transform(process_condition))
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.EVENTS).process_query(
                                           query, HTTPRequestSettings())

    assert query.get_condition() == FunctionCall(
        None,
        BooleanFunctions.AND,
        (
            FunctionCall(
                None,
                "notIn",
                (
                    FunctionCall(None, "assumeNotNull",
                                 (Column(None, None, "group_id"), )),
                    FunctionCall(
                        None,
                        "tuple",
                        (
                            Literal(None, 100),
                            Literal(None, 101),
                            Literal(None, 102),
                        ),
                    ),
                ),
            ),
            build_in("project_id", [2]),
        ),
    )
    assert not query.get_from_clause().final
Exemple #8
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        if not get_config(self.__killswitch, 1):
            return
        condition, cond_class = self.__get_reduced_and_classified_query_clause(
            query.get_condition(), query
        )
        query.set_ast_condition(condition)
        if cond_class == ConditionClass.NOT_OPTIMIZABLE:
            return

        having_cond, having_cond_class = self.__get_reduced_and_classified_query_clause(
            query.get_having(), query
        )
        query.set_ast_having(having_cond)
        if having_cond_class == ConditionClass.NOT_OPTIMIZABLE:
            return

        if not (
            cond_class == ConditionClass.OPTIMIZABLE
            or having_cond_class == ConditionClass.OPTIMIZABLE
        ):
            return

        metrics.increment("optimizable_query")
        query.add_experiment("tags_hashmap_applied", 1)

        if condition is not None:
            query.set_ast_condition(condition.transform(self.__replace_with_hash))
        if having_cond is not None:
            query.set_ast_having(having_cond.transform(self.__replace_with_hash))
def get_filtered_mapping_keys(query: Query, column_name: str) -> Sequence[str]:
    """
    Identifies the conditions we can apply the arrayFilter optimization
    on.
    Which means: if the arrayJoin is in the select clause, there
    are one or more top level AND condition on the arrayJoin and
    there is no OR condition in the query.
    """
    array_join_found = any(
        array_join_pattern(column_name).match(f) is not None
        for selected in query.get_selected_columns() or []
        for f in selected.expression)

    if not array_join_found:
        return list()

    ast_condition = query.get_condition()
    cond_keys = (_get_mapping_keys_in_condition(ast_condition, column_name)
                 if ast_condition is not None else set())
    if cond_keys is None:
        # This means we found an OR. Cowardly we give up even though there could
        # be cases where this condition is still optimizable.
        return []

    ast_having = query.get_having()
    having_keys = (_get_mapping_keys_in_condition(ast_having, column_name)
                   if ast_having is not None else set())
    if having_keys is None:
        # Same as above
        return []

    keys = cond_keys | having_keys
    return sorted(list(keys))
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        if not get_config(self.__killswitch, 1):
            return

        cond_class = ConditionClass.IRRELEVANT
        condition = query.get_condition()
        if condition is not None:
            cond_class = self.__classify_combined_conditions(condition)
            if cond_class == ConditionClass.NOT_OPTIMIZABLE:
                return

        having_cond_class = ConditionClass.IRRELEVANT
        having_cond = query.get_having()
        if having_cond is not None:
            having_cond_class = self.__classify_combined_conditions(having_cond)
            if having_cond_class == ConditionClass.NOT_OPTIMIZABLE:
                return

        if not (
            cond_class == ConditionClass.OPTIMIZABLE
            or having_cond_class == ConditionClass.OPTIMIZABLE
        ):
            return

        metrics.increment("optimizable_query")

        if condition is not None:
            query.set_ast_condition(condition.transform(self.__replace_with_hash))
        if having_cond is not None:
            query.set_ast_having(having_cond.transform(self.__replace_with_hash))
Exemple #11
0
def test_multiple_not_too_many_excludes(
    query_with_multiple_group_ids: ClickhouseQuery, ) -> None:
    """
    Query is looking for multiple groups and there are not too many groups to exclude, but
    there are fewer groups queried for than replaced.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query_with_multiple_group_ids, True)
    state.set_config("max_group_ids_exclude", 5)

    enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings())
    assert query_with_multiple_group_ids.get_condition() == build_and(
        build_not_in("group_id", [101, 102]),
        build_and(build_in("project_id", [2]),
                  build_in("group_id", [101, 102])),
    )
    assert not query_with_multiple_group_ids.get_from_clause().final
Exemple #12
0
def test_single_too_many_exclude(
        query_with_single_group_id: ClickhouseQuery) -> None:
    """
    Query is looking for a group that has been replaced, and there are too many
    groups to exclude.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query_with_single_group_id, True)
    state.set_config("max_group_ids_exclude", 2)

    enforcer.process_query(query_with_single_group_id, HTTPQuerySettings())
    assert query_with_single_group_id.get_condition() == build_and(
        build_not_in("group_id", [101]),
        build_and(build_in("project_id", [2]), build_in("group_id", [101])),
    )
    assert not query_with_single_group_id.get_from_clause().final
Exemple #13
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        def process_condition(exp: Expression) -> Expression:
            result = CONDITION_PATTERN.match(exp)
            if result is not None:
                key_column = result.optional_string(KEY_COL_MAPPING_PARAM)
                if key_column == "tags.key":
                    rhs = result.optional_string(KEY_MAPPING_PARAM)
                    table_name = result.optional_string(TABLE_MAPPING_PARAM)
                    replacement = FunctionCall(
                        exp.alias,
                        "has",
                        (Column(None, table_name,
                                "tags.key"), Literal(None, rhs)),
                    )

                    assert isinstance(exp, FunctionCall)
                    if exp.function_name == ConditionFunctions.EQ:
                        replacement = FunctionCall(exp.alias, "not",
                                                   (replacement, ))

                    return replacement

            return exp

        condition = query.get_condition()
        if condition is not None:
            query.set_ast_condition(condition.transform(process_condition))
def get_filtered_mapping_keys(
    query: Query,
    extractors: Sequence[Extractor[T]],
    is_skippable_condition: Callable[[Expression], bool],
) -> Sequence[T]:
    """
    Identifies the conditions we can apply the arrayFilter optimization on.

    Which means: if the arrayJoin is in the select clause, there are one or
    more top level AND condition on the arrayJoin and there is no OR condition
    in the query.
    """
    ast_condition = query.get_condition()
    cond_keys: Optional[Set[T]] = (
        get_mapping_keys_in_condition(ast_condition, extractors, is_skippable_condition)
        if ast_condition is not None
        else set()
    )
    if cond_keys is None:
        # This means we found an OR. Cowardly we give up even though there could
        # be cases where this condition is still optimizable.
        return []

    ast_having = query.get_having()
    having_keys: Optional[Set[T]] = (
        get_mapping_keys_in_condition(ast_having, extractors, is_skippable_condition)
        if ast_having is not None
        else set()
    )
    if having_keys is None:
        # Same as above
        return []

    keys = cond_keys | having_keys
    return sorted(list(keys))
Exemple #15
0
def test_hexint_column_processor(unprocessed: Expression,
                                 formatted_value: str) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1"))
        ],
        condition=unprocessed,
    )

    HexIntColumnProcessor(set(["column1"
                               ])).process_query(unprocessed_query,
                                                 HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column1",
            FunctionCall(
                None,
                "lower",
                (FunctionCall(
                    None,
                    "hex",
                    (Column(None, None, "column1"), ),
                ), ),
            ),
        )
    ]

    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Exemple #16
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        missing_checkers = {checker for checker in self.__condition_checkers}

        def inspect_expression(condition: Expression) -> None:
            top_level = get_first_level_and_conditions(condition)
            for condition in top_level:
                for checker in self.__condition_checkers:
                    if checker in missing_checkers:
                        if checker.check(condition):
                            missing_checkers.remove(checker)

        condition = query.get_condition()
        if condition is not None:
            inspect_expression(condition)

        prewhere = query.get_prewhere_ast()
        if prewhere is not None:
            inspect_expression(prewhere)

        missing_ids = {checker.get_id() for checker in missing_checkers}
        if get_config("mandatory_condition_enforce", 0):
            assert (
                not missing_checkers
            ), f"Missing mandatory columns in query. Missing {missing_ids}"
        else:
            if missing_checkers:
                logger.error(
                    "Query is missing mandatory columns",
                    extra={"missing_checkers": missing_ids},
                )
Exemple #17
0
def test_without_turbo_without_projects_needing_final(
        query: ClickhouseQuery) -> None:
    PostReplacementConsistencyEnforcer("project_id", None).process_query(
        query, HTTPQuerySettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert not query.get_from_clause().final
Exemple #18
0
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_and(
        FunctionCall(
            None,
            "notIn",
            (
                FunctionCall(None, "assumeNotNull",
                             (Column(None, None, "group_id"), )),
                FunctionCall(
                    None,
                    "tuple",
                    (
                        Literal(None, 100),
                        Literal(None, 101),
                        Literal(None, 102),
                    ),
                ),
            ),
        ),
        build_in("project_id", [2]),
    )
    assert not query.get_from_clause().final
def test_tags_hash_map(query: ClickhouseQuery, expected_condition: Expression,) -> None:
    set_config("tags_hash_map_enabled", 1)
    MappingOptimizer(
        column_name="tags",
        hash_map_name="_tags_hash_map",
        killswitch="tags_hash_map_enabled",
    ).process_query(query, HTTPRequestSettings())

    assert query.get_condition() == expected_condition
Exemple #20
0
    def query_verifier(query: Query, settings: QuerySettings, reader: Reader) -> None:
        class ConditionVisitor(NoopVisitor):
            def __init__(self) -> None:
                self.found_hashmap_condition = False

            def visit_function_call(self, exp: FunctionCall) -> None:
                assert exp.function_name != "arrayElement"
                if (
                    exp.function_name == "has"
                    and isinstance(exp.parameters[0], Column)
                    and exp.parameters[0].column_name == "_tags_hash_map"
                ):
                    self.found_hashmap_condition = True
                return super().visit_function_call(exp)

        visitor = ConditionVisitor()
        query.get_condition().accept(visitor)
        assert visitor.found_hashmap_condition
Exemple #21
0
def test_tags_processor(query_body: MutableMapping[str, Any],
                        expected_query: ClickhouseQuery) -> None:
    """
    Tests the whole processing in some notable cases.
    """
    processed = parse_and_process(query_body)
    assert processed.get_selected_columns(
    ) == expected_query.get_selected_columns()
    assert processed.get_condition() == expected_query.get_condition()
    assert processed.get_having() == expected_query.get_having()
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 2)
    set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.EVENTS).process_query(
                                           query, HTTPRequestSettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
def test_without_turbo_with_projects_needing_final(
        query: ClickhouseQuery) -> None:
    set_project_needs_final(2, ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.EVENTS).process_query(
                                           query, HTTPRequestSettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemple #24
0
def test_translation(mappers: TranslationMappers, query: SnubaQuery,
                     expected: ClickhouseQuery) -> None:
    translated = QueryTranslator(mappers).translate(query)

    # TODO: consider providing an __eq__ method to the Query class. Or turn it into
    # a dataclass.
    assert expected.get_selected_columns() == translated.get_selected_columns()
    assert expected.get_groupby() == translated.get_groupby()
    assert expected.get_condition() == translated.get_condition()
    assert expected.get_arrayjoin() == translated.get_arrayjoin()
    assert expected.get_having() == translated.get_having()
    assert expected.get_orderby() == translated.get_orderby()
Exemple #25
0
    def process_query(self, query: Query, request_settings: RequestSettings) -> None:
        query.transform_expressions(
            self._process_expressions, skip_transform_condition=True
        )

        condition = query.get_condition()
        if condition is not None:
            processed = condition.transform(self.__process_optimizable_condition)
            if processed == condition:
                processed = condition.transform(self._process_expressions)

            query.set_ast_condition(processed)
Exemple #26
0
def test_spans_processor(
    query: ClickhouseQuery,
    expected_selected_columns: List[SelectedExpression],
    expected_conditions: Optional[Expression],
) -> None:
    query_settings = HTTPQuerySettings()
    bloom_filter_processor = BloomFilterOptimizer("spans", ["op", "group"],
                                                  ["exclusive_time"])
    bloom_filter_processor.process_query(query, query_settings)
    array_join_processor = ArrayJoinOptimizer("spans", ["op", "group"],
                                              ["exclusive_time"])
    array_join_processor.process_query(query, query_settings)
    assert query.get_selected_columns() == expected_selected_columns
    assert query.get_condition() == expected_conditions
Exemple #27
0
def test_without_turbo_with_projects_needing_final(
        query: ClickhouseQuery) -> None:
    set_project_needs_final(
        2,
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemple #28
0
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 2)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemple #29
0
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        query.transform_expressions(self._process_expressions,
                                    skip_transform_condition=True)

        condition = query.get_condition()
        if condition is not None:
            if self.__contains_unoptimizable_condition(condition):
                processed = condition.transform(self._process_expressions)
            else:
                processed = condition.transform(
                    self.__process_optimizable_condition)
                if condition == processed:
                    processed = processed.transform(self._process_expressions)

            query.set_ast_condition(processed)
Exemple #30
0
def test_no_groups_too_many_excludes(query: ClickhouseQuery) -> None:
    """
    Query has no groups, and too many to exclude.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query, True)
    state.set_config("max_group_ids_exclude", 1)

    enforcer.process_query(query, HTTPQuerySettings())
    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final