Esempio n. 1
0
    def __get_filter_tags(self, query: Query) -> List[str]:
        """
        Identifies the tag names we can apply the arrayFilter optimization on.
        Which means: if the tags_key column is in the select clause and there are
        one or more top level conditions on the tags_key column.
        """
        if not state.get_config("ast_tag_processor_enabled", 0):
            return []

        select_clause = query.get_selected_columns_from_ast() or []

        tags_key_found = any(col.column_name == "tags_key"
                             for expression in select_clause
                             for col in expression if isinstance(col, Column))

        if not tags_key_found:
            return []

        def extract_tags_from_condition(
            cond: Optional[Expression], ) -> Optional[List[str]]:
            if not cond:
                return []
            if any(
                    is_binary_condition(cond, BooleanFunctions.OR)
                    for cond in cond):
                return None
            return self.__extract_top_level_tag_conditions(cond)

        cond_tags_key = extract_tags_from_condition(
            query.get_condition_from_ast())
        if cond_tags_key is None:
            # This means we found an OR. Cowardly we give up even though there could
            # be cases where this condition is still optimizable.
            return []
        having_tags_key = extract_tags_from_condition(
            query.get_having_from_ast())
        if having_tags_key is None:
            # Same as above
            return []

        return cond_tags_key + having_tags_key
Esempio n. 2
0
    def __init__(
        self,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        # Snuba query structure
        # Referencing them here directly since it makes it easier
        # to process this query independently from the Snuba Query
        # and there is no risk in doing so since they are immutable.
        self.__selected_columns = query.get_selected_columns_from_ast()
        self.__condition = query.get_condition_from_ast()
        self.__groupby = query.get_groupby_from_ast()
        self.__having = query.get_having_from_ast()
        self.__orderby = query.get_orderby_from_ast()
        self.__data_source = query.get_data_source()
        self.__arrayjoin = query.get_arrayjoin_from_ast()
        self.__granularity = query.get_granularity()
        self.__limit = query.get_limit()
        self.__limitby = query.get_limitby()
        self.__offset = query.get_offset()

        if self.__having:
            assert self.__groupby, "found HAVING clause with no GROUP BY"

        # Clickhouse specific fields. Some are still in the Snuba
        # query and have to be moved.
        self.__turbo = settings.get_turbo()
        self.__final = query.get_final()
        self.__sample = query.get_sample()
        self.__hastotals = query.has_totals()
        # TODO: Pre where processing will become a step in Clickhouse Query processing
        # instead of being pulled from the Snuba Query
        self.__prewhere = query.get_prewhere_ast()

        self.__settings = settings
        self.__formatted_query: Optional[str] = None
Esempio n. 3
0
def test_apdex_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            FunctionCall(
                "perf", "apdex", (Column(None, "column1", None), Literal(None, 300))
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            div(
                plus(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.LTE,
                                Column(None, "column1", None),
                                Literal(None, 300),
                            ),
                        ),
                    ),
                    div(
                        FunctionCall(
                            None,
                            "countIf",
                            (
                                binary_condition(
                                    None,
                                    BooleanFunctions.AND,
                                    binary_condition(
                                        None,
                                        ConditionFunctions.GT,
                                        Column(None, "column1", None),
                                        Literal(None, 300),
                                    ),
                                    binary_condition(
                                        None,
                                        ConditionFunctions.LTE,
                                        Column(None, "column1", None),
                                        multiply(Literal(None, 300), Literal(None, 4)),
                                    ),
                                ),
                            ),
                        ),
                        Literal(None, 2),
                    ),
                ),
                FunctionCall(None, "count", (),),
            ),
        ],
    )

    ApdexProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "div(plus(countIf(lessOrEquals(column1, 300)), "
        "div(countIf(and(greater(column1, 300), "
        "lessOrEquals(column1, multiply(300, 4)))), 2)), count())"
    )
Esempio n. 4
0
def test_impact_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            FunctionCall(
                "perf",
                "impact",
                (
                    Column(None, "column1", None),
                    Literal(None, 300),
                    Column(None, "user", None),
                ),
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            plus(
                minus(
                    Literal(None, 1),
                    div(
                        plus(
                            countIf(
                                binary_condition(
                                    None,
                                    ConditionFunctions.LTE,
                                    Column(None, "column1", None),
                                    Literal(None, 300),
                                ),
                            ),
                            div(
                                countIf(
                                    binary_condition(
                                        None,
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            None,
                                            ConditionFunctions.GT,
                                            Column(None, "column1", None),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            None,
                                            ConditionFunctions.LTE,
                                            Column(None, "column1", None),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                                Literal(None, 2),
                            ),
                        ),
                        count(),
                    ),
                ),
                multiply(
                    minus(
                        Literal(None, 1),
                        div(
                            Literal(None, 1),
                            FunctionCall(
                                None,
                                "sqrt",
                                (
                                    FunctionCall(
                                        None,
                                        "uniq",
                                        Column(
                                            alias=None,
                                            column_name="user",
                                            table_name=None,
                                        ),
                                    )
                                ),
                            ),
                        ),
                    ),
                    Literal(None, 3),
                ),
            ),
        ],
    )

    ImpactProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), "
        "div(countIf(and(greater(column1, 300), lessOrEquals(column1, "
        "multiply(300, 4)))), 2)), count())), "
        "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))"
    )
Esempio n. 5
0
def test_replace_expression():
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "c1", "t1")
    column2 = Column(None, "c2", "t1")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2,))

    condition = binary_condition(
        None, ConditionFunctions.EQ, function_1, Literal(None, "1")
    )

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[function_1],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        array_join=None,
        condition=binary_condition(
            None,
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"),)),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))],
        having=None,
        order_by=[orderby],
    )

    assert (
        query.get_selected_columns_from_ast()
        == expected_query.get_selected_columns_from_ast()
    )
    assert query.get_condition_from_ast() == expected_query.get_condition_from_ast()
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast()

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions()
    )