def test_formatting() -> None:
    """
    Validates the formatting of the arrayFilter expressions.
    """
    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            zip_columns(
                Column(None, None, "tags.key"), Column(None, None, "tags.value"),
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), "
        "tags.key, tags.value)) AS snuba_all_tags), 1) AS tags_key)"
    )

    assert tupleElement(
        "tags_key",
        arrayJoin(
            "snuba_all_tags",
            filter_key_values(
                zip_columns(
                    Column(None, None, "tags.key"), Column(None, None, "tags.value"),
                ),
                [Literal(None, "t1"), Literal(None, "t2")],
            ),
        ),
        Literal(None, 1),
    ).accept(ClickhouseExpressionFormatter()) == (
        "(tupleElement((arrayJoin(arrayFilter((pair -> in("
        "tupleElement(pair, 1), tuple('t1', 't2'))), "
        "arrayMap((x, y -> tuple(x, y)), tags.key, tags.value))) AS snuba_all_tags), 1) AS tags_key)"
    )
Ejemplo n.º 2
0
def test_aliases() -> None:
    # No context
    col1 = Column("al1", "table1", "column1")
    col2 = Column("al1", "table1", "column1")

    assert col1.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"
    assert col2.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"

    # With Context
    pc = ParsingContext()
    assert col1.accept(
        ClickhouseExpressionFormatter(pc)) == "(table1.column1 AS al1)"
    assert col2.accept(ClickhouseExpressionFormatter(pc)) == "al1"

    # Hierarchical expression inherits parsing context and applies aliases
    f = FunctionCall(
        None,
        "f1",
        (
            FunctionCall("tag[something]", "tag",
                         (Column(None, "table1", "column1"), )),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "table1", "column1"), )),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "table1", "column1"), )),
        ),
    )

    expected = "f1((tag(table1.column1) AS `tag[something]`), `tag[something]`, `tag[something]`)"
    assert f.accept(ClickhouseExpressionFormatter()) == expected
Ejemplo n.º 3
0
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
        groupby=[Column("my_time", None, "time")],
        granularity=granularity,
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns(
    )
    assert expected.get_condition() == unprocessed.get_condition()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        query_condition = unprocessed.get_condition()
        assert query_condition is not None
        ret = query_condition.accept(ClickhouseExpressionFormatter())
        assert formatted_condition == ret

    assert extract_granularity_from_query(unprocessed,
                                          "finish_ts") == granularity
Ejemplo n.º 4
0
def test_format_expressions(
    expression: Expression, expected_clickhouse: str, expected_anonymized: str
) -> None:
    visitor = ClickhouseExpressionFormatter()
    anonymized_visitor = ClickHouseExpressionFormatterAnonymized()
    assert expression.accept(visitor) == expected_clickhouse
    assert expression.accept(anonymized_visitor) == expected_anonymized
Ejemplo n.º 5
0
def _format_query_content(query: FormattableQuery) -> Sequence[FormattedNode]:
    """
    Produces the content of the formatted query.
    It works for both the composite query and the simple one as the
    only difference is the presence of the prewhere condition.
    Should we have more differences going on we should break this
    method into smaller ones.
    """
    parsing_context = ParsingContext()
    formatter = ClickhouseExpressionFormatter(parsing_context)

    return [
        v for v in [
            _format_select(query, formatter),
            PaddingNode("FROM",
                        DataSourceFormatter().visit(query.get_from_clause())),
            _build_optional_string_node(
                "ARRAY JOIN", query.get_arrayjoin_from_ast(), formatter),
            _build_optional_string_node("PREWHERE", query.get_prewhere_ast(
            ), formatter) if isinstance(query, Query) else None,
            _build_optional_string_node(
                "WHERE", query.get_condition_from_ast(), formatter),
            _format_groupby(query, formatter),
            _build_optional_string_node("HAVING", query.get_having_from_ast(),
                                        formatter),
            _format_orderby(query, formatter),
            _format_limitby(query, formatter),
            _format_limit(query, formatter),
        ] if v is not None
    ]
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    SliceOfMapOptimizer().process_query(unprocessed_query,
                                        HTTPRequestSettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Ejemplo n.º 7
0
def test_type_condition_optimizer() -> None:
    cond1 = binary_condition(
        ConditionFunctions.EQ, Column(None, None, "col1"), Literal(None, "val1")
    )

    unprocessed_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                ConditionFunctions.NEQ,
                Column(None, None, "type"),
                Literal(None, "transaction"),
            ),
            cond1,
        ),
    )
    expected_query = Query(
        Table("errors", ColumnSet([])),
        condition=binary_condition(BooleanFunctions.AND, Literal(None, 1), cond1),
    )
    TypeConditionOptimizer().process_query(unprocessed_query, HTTPQuerySettings())

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == "1 AND equals(col1, 'val1')"
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    FixedStringArrayColumnProcessor(set(["column1", "column2"]),
                                    32).process_query(unprocessed_query,
                                                      HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            Column(None, None, "column2"),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Ejemplo n.º 9
0
def test_hexint_column_processor(unprocessed: Expression,
                                 formatted_value: str) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1"))
        ],
        condition=unprocessed,
    )

    HexIntColumnProcessor(set(["column1"
                               ])).process_query(unprocessed_query,
                                                 HTTPQuerySettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column1",
            FunctionCall(
                None,
                "lower",
                (FunctionCall(
                    None,
                    "hex",
                    (Column(None, None, "column1"), ),
                ), ),
            ),
        )
    ]

    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Ejemplo n.º 10
0
def test_not_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result", FunctionCall("result", "notHandled", tuple(),),
            ),
        ],
    )

    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x",),
                            binary_condition(
                                BooleanFunctions.AND,
                                FunctionCall(None, "isNotNull", (Argument(None, "x"),)),
                                binary_condition(
                                    ConditionFunctions.EQ,
                                    FunctionCall(
                                        None, "assumeNotNull", (Argument(None, "x"),)
                                    ),
                                    Literal(None, 0),
                                ),
                            ),
                        ),
                        Column(None, None, "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset
    )
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)"
    )
Ejemplo n.º 11
0
def test_uuid_array_column_processor(
    unprocessed: Expression,
    expected: Expression,
    formatted_value: str,
) -> None:
    unprocessed_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=unprocessed,
    )
    expected_query = Query(
        Table("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column2", Column(None, None, "column2"))
        ],
        condition=expected,
    )

    UUIDArrayColumnProcessor(set(["column1", "column2"
                                  ])).process_query(unprocessed_query,
                                                    HTTPRequestSettings())
    assert unprocessed_query.get_selected_columns() == [
        SelectedExpression(
            "column2",
            FunctionCall(
                None,
                "arrayMap",
                (
                    Lambda(
                        None,
                        ("x", ),
                        FunctionCall(
                            None,
                            "replaceAll",
                            (
                                FunctionCall(None, "toString",
                                             (Argument(None, "x"), )),
                                Literal(None, "-"),
                                Literal(None, ""),
                            ),
                        ),
                    ),
                    Column(None, None, "column2"),
                ),
            ),
        )
    ]

    assert expected_query.get_condition() == unprocessed_query.get_condition()
    condition = unprocessed_query.get_condition()
    assert condition is not None
    ret = condition.accept(ClickhouseExpressionFormatter())
    assert ret == formatted_value
Ejemplo n.º 12
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate",
                                                    ())),
        ],
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (combine_and_conditions([
                            binary_condition(
                                ConditionFunctions.NEQ,
                                Column(None, None, "transaction_status"),
                                Literal(None, code),
                            ) for code in [0, 1, 2]
                        ]), ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)"
    )
Ejemplo n.º 13
0
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected_query = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                Column("the_message", None, "message"),
            ),
        ],
    )

    GroupIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert expected_query.get_selected_columns(
    ) == unprocessed.get_selected_columns()

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(message AS the_message)",
    )

    for idx, column in enumerate(unprocessed.get_selected_columns()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
Ejemplo n.º 14
0
def test_event_id_column_format_expressions() -> None:
    unprocessed = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id", Column("the_event_id", None, "event_id")
            ),
        ],
    )
    expected = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration", Column("transaction.duration", None, "duration")
            ),
            SelectedExpression(
                "the_event_id",
                FunctionCall(
                    "the_event_id",
                    "replaceAll",
                    (
                        FunctionCall(
                            None, "toString", (Column(None, None, "event_id"),),
                        ),
                        Literal(None, "-"),
                        Literal(None, ""),
                    ),
                ),
            ),
        ],
    )

    EventIdColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
Ejemplo n.º 15
0
 def __init__(self, columns: Iterable[str]) -> None:
     self.__columns = columns
     self.__formatter = ClickhouseExpressionFormatter()
Ejemplo n.º 16
0
def test_apdex_format_expressions() -> None:
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                FunctionCall(
                    "perf", "apdex", (Column(None, None, "column1"), Literal(None, 300))
                ),
            ),
        ],
    )
    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    plus(
                        FunctionCall(
                            None,
                            "countIf",
                            (
                                binary_condition(
                                    ConditionFunctions.LTE,
                                    Column(None, None, "column1"),
                                    Literal(None, 300),
                                ),
                            ),
                        ),
                        divide(
                            FunctionCall(
                                None,
                                "countIf",
                                (
                                    binary_condition(
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            ConditionFunctions.GT,
                                            Column(None, None, "column1"),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            ConditionFunctions.LTE,
                                            Column(None, None, "column1"),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                            ),
                            Literal(None, 2),
                        ),
                    ),
                    FunctionCall(
                        None,
                        "count",
                        (),
                    ),
                    "perf",
                ),
            ),
        ],
    )

    apdex_processor().process_query(unprocessed, HTTPQuerySettings())
    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(plus(countIf(lessOrEquals(column1, 300)), "
        "divide(countIf(greater(column1, 300) AND "
        "lessOrEquals(column1, multiply(300, 4))), 2)), count()) AS perf)"
    )
Ejemplo n.º 17
0
def test_escaping(expression: Expression, expected: str) -> None:
    visitor = ClickhouseExpressionFormatter()
    assert expression.accept(visitor) == expected
Ejemplo n.º 18
0
def test_format_expressions(expression: Expression, expected_clickhouse: str,
                            expected_tracing: TExpression) -> None:
    visitor = ClickhouseExpressionFormatter()
    assert expression.accept(visitor) == expected_clickhouse
    assert expression.accept(TracingExpressionFormatter()) == expected_tracing