Exemple #1
0
def test_aliases() -> None:
    # No context
    col1 = Column("al1", "column1", "table1")
    col2 = Column("al1", "column1", "table1")

    assert col1.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"
    assert col2.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"

    # With Context
    pc = ParsingContext()
    assert col1.accept(
        ClickhouseExpressionFormatter(pc)) == "(table1.column1 AS al1)"
    assert col2.accept(ClickhouseExpressionFormatter(pc)) == "al1"

    # Hierarchical expression inherits parsing context and applies alaises
    f = FunctionCall(
        None,
        "f1",
        (
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
        ),
    )

    expected = "f1((tag(table1.column1) AS `tag[something]`), `tag[something]`, `tag[something]`)"
    assert f.accept(ClickhouseExpressionFormatter()) == expected
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        {},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        granularity=granularity,
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())
    assert expected.get_condition_from_ast(
    ) == unprocessed.get_condition_from_ast()

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        ret = unprocessed.get_condition_from_ast().accept(
            ClickhouseExpressionFormatter())
        assert formatted_condition == ret
Exemple #3
0
def test_timeseries_column_format_expressions(granularity: int,
                                              ast_value: FunctionCall,
                                              formatted_value: str) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(ast_value.alias, ast_value),
        ],
    )

    dataset = TransactionsDataset()
    for processor in dataset.get_query_processors():
        if isinstance(processor, TimeSeriesColumnProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_value
Exemple #4
0
def test_timeseries_column_format_expressions(
    granularity, ast_value, formatted_value
) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            Column("transaction.duration", "duration", None),
            Column("my_start", "bucketed_start", None),
        ],
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[Column("transaction.duration", "duration", None), ast_value,],
    )

    dataset = TransactionsDataset()
    TimeSeriesColumnProcessor(
        dataset._TimeSeriesDataset__time_group_columns
    ).process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == formatted_value
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate",
                                                    ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (binary_condition(
                            None,
                            ConditionFunctions.NOT_IN,
                            Column(None, None, "transaction_status"),
                            FunctionCall(
                                None,
                                "tuple",
                                (
                                    Literal(alias=None, value=0),
                                    Literal(alias=None, value=1),
                                    Literal(alias=None, value=2),
                                ),
                            ),
                        ), ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)"
    )
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                FunctionCall(
                    "the_message",
                    "coalesce",
                    (
                        Column(None, None, "search_message"),
                        Column(None, None, "message"),
                    ),
                ),
            ),
        ],
    )

    EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(coalesce(search_message, message) AS the_message)",
    )

    for idx, column in enumerate(
            unprocessed.get_selected_columns_from_ast()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
Exemple #7
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            combine_and_conditions(
                                [
                                    binary_condition(
                                        None,
                                        ConditionFunctions.NEQ,
                                        Column(None, None, "transaction_status"),
                                        Literal(None, code),
                                    )
                                    for code in [0, 1, 2]
                                ]
                            ),
                        ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings()
    )
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)"
    )
def test_formatting() -> None:
    """
    Validates the formatting of the arrayFilter expressions.
    """
    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            zip_columns(
                Column(None, None, "tags.key"),
                Column(None, None, "tags.value"),
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), "
        "tags.key, tags.value)) AS snuba_all_tags), 1) AS tags_key)")

    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            filter_key_values(
                zip_columns(
                    Column(None, None, "tags.key"),
                    Column(None, None, "tags.value"),
                ),
                [Literal(None, "t1"), Literal(None, "t2")],
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayFilter((pair -> in("
        "tupleElement(pair, 1), tuple('t1', 't2'))), "
        "arrayMap((x, y -> tuple(x, y)), tags.key, tags.value))) AS snuba_all_tags), 1) AS tags_key)"
    )
def test_transaction_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id", Column("the_event_id", None, "event_id")
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id",
                FunctionCall(
                    "the_event_id",
                    "replaceAll",
                    (
                        FunctionCall(
                            None, "toString", (Column(None, None, "event_id"),),
                        ),
                        Literal(None, "-"),
                        Literal(None, ""),
                    ),
                ),
            ),
        ],
    )

    TransactionColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
Exemple #10
0
def test_escaping(expression: Expression, expected: str) -> None:
    visitor = ClickhouseExpressionFormatter()
    assert expression.accept(visitor) == expected
Exemple #11
0
    def _sql_data_list(self) -> Sequence[Tuple[str, str]]:
        if self.__sql_data_list:
            return self.__sql_data_list

        parsing_context = ParsingContext()
        formatter = ClickhouseExpressionFormatter(parsing_context)

        selected_cols = [
            e.expression.accept(formatter) for e in self.__selected_columns
        ]
        select_clause = f"SELECT {', '.join(selected_cols)}"

        # TODO: The visitor approach will be used for the FROM clause as well.
        from_clause = f"FROM {self.__data_source.format_from()}"

        if self.__final:
            from_clause = f"{from_clause} FINAL"

        # TODO: Sampling rate will become one step of Clickhouse query processing
        if not self.__data_source.supports_sample():
            sample_rate = None
        else:
            if self.__sample:
                sample_rate = self.__sample
            elif self.__settings.get_turbo():
                sample_rate = settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None
        if sample_rate:
            from_clause = f"{from_clause} SAMPLE {sample_rate}"

        array_join_clause = ""
        if self.__arrayjoin:
            formatted_array_join = self.__arrayjoin.accept(formatter)
            array_join_clause = f"ARRAY JOIN {formatted_array_join}"

        prewhere_clause = ""
        if self.__prewhere:
            formatted_prewhere = self.__prewhere.accept(formatter)
            prewhere_clause = f"PREWHERE {formatted_prewhere}"

        where_clause = ""
        if self.__condition:
            where_clause = f"WHERE {self.__condition.accept(formatter)}"

        group_clause = ""
        if self.__groupby:
            # reformat to use aliases generated during the select clause formatting.
            groupby_expressions = [e.accept(formatter) for e in self.__groupby]
            group_clause = f"GROUP BY ({', '.join(groupby_expressions)})"
            if self.__hastotals:
                group_clause = f"{group_clause} WITH TOTALS"

        having_clause = ""
        if self.__having:
            having_clause = f"HAVING {self.__having.accept(formatter)}"

        order_clause = ""
        if self.__orderby:
            orderby = [
                f"{e.expression.accept(formatter)} {e.direction.value}"
                for e in self.__orderby
            ]
            order_clause = f"ORDER BY {', '.join(orderby)}"

        limitby_clause = ""
        if self.__limitby is not None:
            limitby_clause = "LIMIT {} BY {}".format(*self.__limitby)

        limit_clause = ""
        if self.__limit is not None:
            limit_clause = f"LIMIT {self.__limit} OFFSET {self.__offset}"

        self.__sql_data_list = [
            (k, v)
            for k, v in [
                ("select", select_clause),
                ("from", from_clause),
                ("array_join", array_join_clause),
                ("prewhere", prewhere_clause),
                ("where", where_clause),
                ("group", group_clause),
                ("having", having_clause),
                ("order", order_clause),
                ("limitby", limitby_clause),
                ("limit", limit_clause),
            ]
            if v
        ]

        return self.__sql_data_list
Exemple #12
0
def test_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "isHandled",
                    tuple(),
                ),
            ),
        ],
    )

    expected = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x", ),
                            binary_condition(
                                None,
                                BooleanFunctions.OR,
                                FunctionCall(None, "isNull",
                                             (Argument(None, "x"), )),
                                binary_condition(
                                    None,
                                    ConditionFunctions.EQ,
                                    FunctionCall(None, "assumeNotNull",
                                                 (Argument(None, "x"), )),
                                    Literal(None, 1),
                                ),
                            ),
                        ),
                        Column(None, None,
                               "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset)
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(arrayExists((x -> (isNull(x) OR equals(assumeNotNull(x), 1))), exception_stacks.mechanism_handled) AS result)"
    )
Exemple #13
0
def test_impact_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            FunctionCall(
                "perf",
                "impact",
                (
                    Column(None, "column1", None),
                    Literal(None, 300),
                    Column(None, "user", None),
                ),
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            plus(
                minus(
                    Literal(None, 1),
                    div(
                        plus(
                            countIf(
                                binary_condition(
                                    None,
                                    ConditionFunctions.LTE,
                                    Column(None, "column1", None),
                                    Literal(None, 300),
                                ),
                            ),
                            div(
                                countIf(
                                    binary_condition(
                                        None,
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            None,
                                            ConditionFunctions.GT,
                                            Column(None, "column1", None),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            None,
                                            ConditionFunctions.LTE,
                                            Column(None, "column1", None),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                                Literal(None, 2),
                            ),
                        ),
                        count(),
                    ),
                ),
                multiply(
                    minus(
                        Literal(None, 1),
                        div(
                            Literal(None, 1),
                            FunctionCall(
                                None,
                                "sqrt",
                                (
                                    FunctionCall(
                                        None,
                                        "uniq",
                                        Column(
                                            alias=None,
                                            column_name="user",
                                            table_name=None,
                                        ),
                                    )
                                ),
                            ),
                        ),
                    ),
                    Literal(None, 3),
                ),
            ),
        ],
    )

    ImpactProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), "
        "div(countIf(and(greater(column1, 300), lessOrEquals(column1, "
        "multiply(300, 4)))), 2)), count())), "
        "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))"
    )
Exemple #14
0
def test_apdex_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            FunctionCall(
                "perf", "apdex", (Column(None, "column1", None), Literal(None, 300))
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            Column(None, "column2", None),
            div(
                plus(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.LTE,
                                Column(None, "column1", None),
                                Literal(None, 300),
                            ),
                        ),
                    ),
                    div(
                        FunctionCall(
                            None,
                            "countIf",
                            (
                                binary_condition(
                                    None,
                                    BooleanFunctions.AND,
                                    binary_condition(
                                        None,
                                        ConditionFunctions.GT,
                                        Column(None, "column1", None),
                                        Literal(None, 300),
                                    ),
                                    binary_condition(
                                        None,
                                        ConditionFunctions.LTE,
                                        Column(None, "column1", None),
                                        multiply(Literal(None, 300), Literal(None, 4)),
                                    ),
                                ),
                            ),
                        ),
                        Literal(None, 2),
                    ),
                ),
                FunctionCall(None, "count", (),),
            ),
        ],
    )

    ApdexProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "div(plus(countIf(lessOrEquals(column1, 300)), "
        "div(countIf(and(greater(column1, 300), "
        "lessOrEquals(column1, multiply(300, 4)))), 2)), count())"
    )