Beispiel #1
0
def test_timeseries_column_format_expressions(granularity: int,
                                              ast_value: FunctionCall,
                                              formatted_value: str) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(ast_value.alias, ast_value),
        ],
    )

    dataset = TransactionsDataset()
    for processor in dataset.get_query_processors():
        if isinstance(processor, TimeSeriesColumnProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_value
Beispiel #2
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader[SqlQuery]) -> QueryResult:
     assert query.get_selected_columns_from_ast() == [
         SelectedExpression(
             "duration_quantiles",
             CurriedFunctionCall(
                 "duration_quantiles",
                 FunctionCall(
                     None,
                     "quantilesIfMerge",
                     (Literal(None, 0.5), Literal(None, 0.9)),
                 ),
                 (Column(None, None, "duration_quantiles"), ),
             ),
         ),
         SelectedExpression(
             "sessions",
             FunctionCall("sessions", "countIfMerge",
                          (Column(None, None, "sessions"), )),
         ),
         SelectedExpression(
             "users",
             FunctionCall("users", "uniqIfMerge",
                          (Column(None, None, "users"), )),
         ),
     ]
     return QueryResult({}, {})
Beispiel #3
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader[SqlQuery]) -> QueryResult:
     assert query.get_selected_columns_from_ast() == [
         SelectedExpression(
             "tags[transaction]",
             Column("tags[transaction]", None, "transaction")),
         SelectedExpression(
             "contexts[browser.name]",
             FunctionCall(
                 "contexts[browser.name]",
                 "arrayElement",
                 (
                     Column(None, None, "contexts.value"),
                     FunctionCall(
                         None,
                         "indexOf",
                         (
                             Column(None, None, "contexts.key"),
                             Literal(None, "browser.name"),
                         ),
                     ),
                 ),
             ),
         ),
     ]
     return QueryResult({}, {})
Beispiel #4
0
def test_format_clickhouse_specific_query() -> None:
    """
    Adds a few of the Clickhosue specific fields to the query.
    """

    query = Query(
        {
            "sample": 0.1,
            "totals": True,
            "limitby": (10, "environment")
        },
        TableSource("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression("column1", Column(None, None, "column1")),
            SelectedExpression("column2", Column(None, "table1", "column2")),
        ],
        condition=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, "blabla"),
        ),
        groupby=[
            Column(None, None, "column1"),
            Column(None, "table1", "column2")
        ],
        having=binary_condition(
            None,
            "eq",
            lhs=Column(None, None, "column1"),
            rhs=Literal(None, 123),
        ),
        order_by=[
            OrderBy(OrderByDirection.ASC, Column(None, None, "column1"))
        ],
        array_join=Column(None, None, "column1"),
    )

    query.set_final(True)
    query.set_offset(50)
    query.set_limit(100)

    request_settings = HTTPRequestSettings()
    clickhouse_query = AstSqlQuery(query, request_settings)

    expected = {
        "from": "FROM my_table FINAL SAMPLE 0.1",
        "group": "GROUP BY (column1, table1.column2) WITH TOTALS",
        "having": "HAVING eq(column1, 123)",
        "array_join": "ARRAY JOIN column1",
        "limit": "LIMIT 100 OFFSET 50",
        "limitby": "LIMIT 10 BY environment",
        "order": "ORDER BY column1 ASC",
        "select": "SELECT column1, table1.column2",
        "where": "WHERE eq(column1, 'blabla')",
    }

    assert clickhouse_query.sql_data() == expected
Beispiel #5
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate",
                                                    ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None,
                               expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (binary_condition(
                            None,
                            ConditionFunctions.NOT_IN,
                            Column(None, None, "transaction_status"),
                            FunctionCall(
                                None,
                                "tuple",
                                (
                                    Literal(alias=None, value=0),
                                    Literal(alias=None, value=1),
                                    Literal(alias=None, value=2),
                                ),
                            ),
                        ), ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)"
    )
Beispiel #6
0
def test_timeseries_format_expressions(
    granularity: int,
    condition: Optional[FunctionCall],
    exp_column: FunctionCall,
    exp_condition: Optional[FunctionCall],
    formatted_column: str,
    formatted_condition: str,
) -> None:
    unprocessed = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("my_time", Column("my_time", None, "time")),
        ],
        condition=condition,
    )
    expected = Query(
        {"granularity": granularity},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(exp_column.alias, exp_column),
        ],
        condition=exp_condition,
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())
    assert expected.get_condition_from_ast(
    ) == unprocessed.get_condition_from_ast()

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == formatted_column
    if condition:
        ret = unprocessed.get_condition_from_ast().accept(
            ClickhouseExpressionFormatter())
        assert formatted_condition == ret
def test_transaction_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("the_event_id",
                               Column("the_event_id", None, "event_id")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(
                "the_event_id",
                FunctionCall(
                    "the_event_id",
                    "replaceAll",
                    (
                        FunctionCall(
                            None,
                            "toString",
                            (Column(None, None, "event_id"), ),
                        ),
                        Literal(None, "-"),
                        Literal(None, ""),
                    ),
                ),
            ),
        ],
    )

    TransactionColumnProcessor().process_query(unprocessed,
                                               HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    formatted = unprocessed.get_selected_columns_from_ast(
    )[1].expression.accept(ClickhouseExpressionFormatter())
    assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
Beispiel #8
0
def test_col_split(
    dataset_name: str,
    id_column: str,
    project_column: str,
    timestamp_column: str,
    first_query_data: Sequence[MutableMapping[str, Any]],
    second_query_data: Sequence[MutableMapping[str, Any]],
) -> None:
    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
        reader: Reader[SqlQuery],
    ) -> QueryResult:
        selected_cols = query.get_selected_columns()
        assert selected_cols == [
            c.expression.column_name
            for c in query.get_selected_columns_from_ast() or []
            if isinstance(c.expression, Column)
        ]
        if selected_cols == list(first_query_data[0].keys()):
            return QueryResult({"data": first_query_data}, {})
        elif selected_cols == list(second_query_data[0].keys()):
            return QueryResult({"data": second_query_data}, {})
        else:
            raise ValueError(f"Unexpected selected columns: {selected_cols}")

    events = get_dataset(dataset_name)
    query = ClickhouseQuery(
        LogicalQuery(
            {
                "selected_columns": list(second_query_data[0].keys()),
                "conditions": [""],
                "orderby": "events.event_id",
                "sample": 10,
                "limit": 100,
                "offset": 50,
            },
            events.get_all_storages()[0].get_schema().get_data_source(),
            selected_columns=[
                SelectedExpression(name=col_name,
                                   expression=Column(None, None, col_name))
                for col_name in second_query_data[0].keys()
            ],
        ))

    strategy = SimpleQueryPlanExecutionStrategy(
        ClickhouseCluster("localhost", 1024, "default", "", "default", 80,
                          set(), True),
        [],
        [
            ColumnSplitQueryStrategy(id_column, project_column,
                                     timestamp_column),
            TimeSplitQueryStrategy(timestamp_col=timestamp_column),
        ],
    )

    strategy.execute(query, HTTPRequestSettings(), do_query)
def build_query(
    selected_columns: Optional[Sequence[Expression]] = None,
    condition: Optional[Expression] = None,
    having: Optional[Expression] = None,
) -> ClickhouseQuery:
    return ClickhouseQuery(
        SnubaQuery(
            {},
            None,
            selected_columns=[
                SelectedExpression(name=s.alias, expression=s)
                for s in selected_columns or []
            ],
            condition=condition,
            having=having,
        ))
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                FunctionCall(
                    "the_message",
                    "coalesce",
                    (
                        Column(None, None, "search_message"),
                        Column(None, None, "message"),
                    ),
                ),
            ),
        ],
    )

    EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(coalesce(search_message, message) AS the_message)",
    )

    for idx, column in enumerate(
            unprocessed.get_selected_columns_from_ast()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
Beispiel #11
0
 def build_selected_expressions(
     raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
     output = []
     for raw_expression in raw_expressions:
         exp = parse_expression(tuplify(raw_expression),
                                entity.get_data_model(), set())
         output.append(
             SelectedExpression(
                 # An expression in the query can be a string or a
                 # complex list with an alias. In the second case
                 # we trust the parser to find the alias.
                 name=raw_expression
                 if isinstance(raw_expression, str) else exp.alias,
                 expression=exp,
             ))
     return output
Beispiel #12
0
def test_iterate_over_query():
    """
    Creates a query with the new AST and iterate over all expressions.
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(None, ConditionFunctions.EQ, column1,
                                 Literal(None, "1"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        order_by=[orderby],
    )

    expected_expressions = [
        # selected columns
        column1,
        column2,
        function_1,
        # condition
        column1,
        Literal(None, "1"),
        condition,
        # groupby
        column1,
        column2,
        function_1,
        # order by
        column2,
        function_2,
    ]

    assert list(query.get_all_expressions()) == expected_expressions
Beispiel #13
0
def test_handled_processor_invalid() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "isHandled",
                    (Column(None, None, "type"), ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset)
    with pytest.raises(InvalidExpressionException):
        processor.process_query(unprocessed, HTTPRequestSettings())
Beispiel #14
0
def test_invalid_datetime() -> None:
    unprocessed = Query(
        {},
        TableSource("transactions", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
        ],
        condition=binary_condition(
            None,
            ConditionFunctions.EQ,
            Column("my_time", None, "time"),
            Literal(None, ""),
        ),
    )

    entity = TransactionsEntity()
    processors = entity.get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            with pytest.raises(InvalidQueryException):
                processor.process_query(unprocessed, HTTPRequestSettings())
Beispiel #15
0
def _parse_query_impl(body: MutableMapping[str, Any], entity: Entity) -> Query:
    def build_selected_expressions(
        raw_expressions: Sequence[Any], ) -> List[SelectedExpression]:
        output = []
        for raw_expression in raw_expressions:
            exp = parse_expression(tuplify(raw_expression),
                                   entity.get_data_model(), set())
            output.append(
                SelectedExpression(
                    # An expression in the query can be a string or a
                    # complex list with an alias. In the second case
                    # we trust the parser to find the alias.
                    name=raw_expression
                    if isinstance(raw_expression, str) else exp.alias,
                    expression=exp,
                ))
        return output

    aggregations = []
    for aggregation in body.get("aggregations", []):
        if not isinstance(aggregation, Sequence):
            raise ParsingException((
                f"Invalid aggregation structure {aggregation}. "
                "It must be a sequence containing expression, column and alias."
            ))
        aggregation_function = aggregation[0]
        column_expr = aggregation[1]
        column_expr = column_expr if column_expr else []
        alias = aggregation[2]
        alias = alias if alias else None

        aggregations.append(
            SelectedExpression(
                name=alias,
                expression=parse_aggregation(
                    aggregation_function,
                    column_expr,
                    alias,
                    entity.get_data_model(),
                    set(),
                ),
            ))

    groupby_clause = build_selected_expressions(
        to_list(body.get("groupby", [])))

    select_clause = (
        groupby_clause + aggregations +
        build_selected_expressions(body.get("selected_columns", [])))

    array_join_cols = set()
    arrayjoin = body.get("arrayjoin")
    # TODO: Properly detect all array join columns in all clauses of the query.
    # This is missing an arrayJoin in condition with an alias that is then
    # used in the select.
    if arrayjoin:
        array_join_cols.add(arrayjoin)
        array_join_expr: Optional[Expression] = parse_expression(
            body["arrayjoin"], entity.get_data_model(), {arrayjoin})
    else:
        array_join_expr = None
        for select_expr in select_clause:
            if isinstance(select_expr.expression, FunctionCall):
                if select_expr.expression.function_name == "arrayJoin":
                    parameters = select_expr.expression.parameters
                    if len(parameters) != 1:
                        raise ParsingException(
                            "arrayJoin(...) only accepts a single parameter.")
                    if isinstance(parameters[0], Column):
                        array_join_cols.add(parameters[0].column_name)
                    else:
                        # We only accepts columns or functions that do not
                        # reference columns. We could not say whether we are
                        # actually arrayjoining on the values of the column
                        # if it is nested in an arbitrary function. But
                        # functions of literals are fine.
                        for e in parameters[0]:
                            if isinstance(e, Column):
                                raise ParsingException(
                                    "arrayJoin(...) cannot contain columns nested in functions."
                                )

    where_expr = parse_conditions_to_expr(body.get("conditions", []), entity,
                                          array_join_cols)
    having_expr = parse_conditions_to_expr(body.get("having", []), entity,
                                           array_join_cols)

    orderby_exprs = []
    for orderby in to_list(body.get("orderby", [])):
        if isinstance(orderby, str):
            match = NEGATE_RE.match(orderby)
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is a string, "
                    "it must respect the format `[-]column`"))
            direction, col = match.groups()
            orderby = col
        elif is_function(orderby):
            match = NEGATE_RE.match(orderby[0])
            if match is None:
                raise ParsingException((
                    f"Invalid Order By clause {orderby}. If the Order By is an expression, "
                    "the function name must respect the format `[-]func_name`"
                ))
            direction, col = match.groups()
            orderby = [col] + orderby[1:]
        else:
            raise ParsingException(
                (f"Invalid Order By clause {orderby}. The Clause was neither "
                 "a string nor a function call."))
        orderby_parsed = parse_expression(tuplify(orderby),
                                          entity.get_data_model(), set())
        orderby_exprs.append(
            OrderBy(
                OrderByDirection.DESC
                if direction == "-" else OrderByDirection.ASC,
                orderby_parsed,
            ))

    return Query(
        body,
        None,
        selected_columns=select_clause,
        array_join=array_join_expr,
        condition=where_expr,
        groupby=[g.expression for g in groupby_clause],
        having=having_expr,
        order_by=orderby_exprs,
    )
Beispiel #16
0
    BooleanFunctions,
    ConditionFunctions,
    binary_condition,
)
from snuba.query.expressions import Column, CurriedFunctionCall, FunctionCall, Literal
from snuba.query.logical import OrderBy, OrderByDirection, Query, SelectedExpression
from snuba.request.request_settings import HTTPRequestSettings

test_cases = [
    pytest.param(
        # Simple query with aliases and multiple tables
        Query(
            {},
            TableSource("my_table", ColumnSet([])),
            selected_columns=[
                SelectedExpression("column1", Column(None, None, "column1")),
                SelectedExpression("column2", Column(None, "table1",
                                                     "column2")),
                SelectedExpression("column3", Column("al", None, "column3")),
            ],
            condition=binary_condition(
                None,
                "eq",
                lhs=Column("al", None, "column3"),
                rhs=Literal(None, "blabla"),
            ),
            groupby=[
                Column(None, None, "column1"),
                Column(None, "table1", "column2"),
                Column("al", None, "column3"),
                Column(None, None, "column4"),
from snuba.query.matchers import (
    Column as ColumnMatch,
    String as StringMatch,
    MatchResult,
    Param,
)
from snuba.query.processors.pattern_replacer import PatternReplacer
from snuba.request.request_settings import HTTPRequestSettings

test_data = [
    pytest.param(
        Query(
            {},
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression(name=None,
                                   expression=Column(None, None, "column1")),
                SelectedExpression(name=None,
                                   expression=Column(None, None, "column2")),
            ],
        ),
        Query(
            {},
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression(
                    name=None,
                    expression=FunctionCall(
                        None,
                        "nullIf",
                        (Column(None, None, "column1"), Literal(None, "")),
                    ),
from snuba.query.processors.custom_function import (
    CustomFunction,
    InvalidCustomFunctionCall,
    partial_function,
    simple_function,
)
from snuba.query.validation.signature import Column as ColType
from snuba.request.request_settings import HTTPRequestSettings

TEST_CASES = [
    pytest.param(
        Query(
            {},
            None,
            selected_columns=[
                SelectedExpression("column1", Column("column1", None,
                                                     "column1")),
            ],
            groupby=[Column("column1", None, "column1")],
            condition=binary_condition(
                None,
                "equals",
                FunctionCall("group_id", "f",
                             (Column("something", None, "something"), )),
                Literal(None, 1),
            ),
        ),
        Query(
            {},
            None,
            selected_columns=[
                SelectedExpression("column1", Column("column1", None,
Beispiel #19
0
    Column,
    FunctionCall,
    Literal,
    SubscriptableReference,
)
from snuba.query.logical import Query as SnubaQuery
from snuba.query.logical import SelectedExpression

test_cases = [
    pytest.param(
        TranslationMappers(),
        SnubaQuery(
            body={},
            data_source=TableSource("my_table", ColumnSet([])),
            selected_columns=[
                SelectedExpression("alias", Column("alias", "table", "column")),
                SelectedExpression(
                    "alias2",
                    FunctionCall(
                        "alias2",
                        "f1",
                        (Column(None, None, "column2"), Column(None, None, "column3")),
                    ),
                ),
                SelectedExpression(
                    name=None,
                    expression=SubscriptableReference(
                        None, Column(None, None, "tags"), Literal(None, "myTag")
                    ),
                ),
            ],
Beispiel #20
0
def test_tags_expander() -> None:
    query_body = {
        "selected_columns": [
            ["f1", ["tags_key", "column2"], "f1_alias"],
            ["f2", [], "f2_alias"],
        ],
        "aggregations": [
            ["count", "platform", "platforms"],
            ["testF", ["platform", "tags_value"], "top_platforms"],
        ],
        "conditions": [["tags_key", "=", "tags_key"]],
        "having": [["tags_value", "IN", ["tag"]]],
    }

    events = get_dataset("events")
    query = parse_query(query_body, events)

    processor = TagsExpanderProcessor()
    request_settings = HTTPRequestSettings()
    processor.process_query(query, request_settings)

    assert query.get_selected_columns_from_ast() == [
        SelectedExpression(
            "platforms",
            FunctionCall("platforms", "count",
                         (Column("platform", None, "platform"), )),
        ),
        SelectedExpression(
            "top_platforms",
            FunctionCall(
                "top_platforms",
                "testF",
                (
                    Column("platform", None, "platform"),
                    FunctionCall("tags_value", "arrayJoin",
                                 (Column(None, None, "tags.value"), )),
                ),
            ),
        ),
        SelectedExpression(
            "f1_alias",
            FunctionCall(
                "f1_alias",
                "f1",
                (
                    FunctionCall("tags_key", "arrayJoin",
                                 (Column(None, None, "tags.key"), )),
                    Column("column2", None, "column2"),
                ),
            ),
        ),
        SelectedExpression("f2_alias", FunctionCall("f2_alias", "f2",
                                                    tuple())),
    ]

    assert query.get_condition_from_ast() == binary_condition(
        None,
        OPERATOR_TO_FUNCTION["="],
        FunctionCall("tags_key", "arrayJoin",
                     (Column(None, None, "tags.key"), )),
        Literal(None, "tags_key"),
    )

    assert query.get_having_from_ast() == in_condition(
        None,
        FunctionCall("tags_value", "arrayJoin",
                     (Column(None, None, "tags.value"), )),
        [Literal(None, "tag")],
    )
Beispiel #21
0
    def execute(
        self,
        query: Query,
        request_settings: RequestSettings,
        runner: SplitQueryRunner,
    ) -> Optional[QueryResult]:
        """
        Split query in 2 steps if a large number of columns is being selected.
            - First query only selects event_id, project_id and timestamp.
            - Second query selects all fields for only those events.
            - Shrink the date range.
        """
        limit = query.get_limit()
        if (limit is None or limit == 0 or query.get_groupby()
                or query.get_aggregations()
                or not query.get_selected_columns()):
            return None

        if limit > settings.COLUMN_SPLIT_MAX_LIMIT:
            metrics.increment("column_splitter.query_above_limit")
            return None

        # Do not split if there is already a = or IN condition on an ID column
        id_column_matcher = FunctionCall(
            Or([String(ConditionFunctions.EQ),
                String(ConditionFunctions.IN)]),
            (
                Column(None, String(self.__id_column)),
                AnyExpression(),
            ),
        )

        for expr in query.get_condition_from_ast() or []:
            match = id_column_matcher.match(expr)

            if match:
                return None

        # We need to count the number of table/column name pairs
        # not the number of distinct Column objects in the query
        # so to avoid counting aliased columns multiple times.
        total_columns = {(col.table_name, col.column_name)
                         for col in query.get_all_ast_referenced_columns()}

        minimal_query = copy.deepcopy(query)
        minimal_query.set_selected_columns(
            [self.__id_column, self.__project_column, self.__timestamp_column])
        # TODO: provide the table alias name to this splitter if we ever use it
        # in joins.
        minimal_query.set_ast_selected_columns([
            SelectedExpression(self.__id_column,
                               ColumnExpr(None, None, self.__id_column)),
            SelectedExpression(self.__project_column,
                               ColumnExpr(None, None, self.__project_column)),
            SelectedExpression(
                self.__timestamp_column,
                ColumnExpr(None, None, self.__timestamp_column),
            ),
        ])

        for exp in minimal_query.get_all_expressions():
            if exp.alias in (
                    self.__id_column,
                    self.__project_column,
                    self.__timestamp_column,
            ) and not (isinstance(exp, ColumnExpr)
                       and exp.column_name == exp.alias):
                logger.warning(
                    "Potential alias shadowing due to column splitter",
                    extra={"expression": exp},
                    exc_info=True,
                )

        minimal_columns = {
            (col.table_name, col.column_name)
            for col in minimal_query.get_all_ast_referenced_columns()
        }
        if len(total_columns) <= len(minimal_columns):
            return None

        # Ensures the AST minimal query is actually runnable on its own.
        if not minimal_query.validate_aliases():
            return None

        legacy_references = set(minimal_query.get_all_referenced_columns())
        ast_column_names = {
            c.column_name
            for c in minimal_query.get_all_ast_referenced_columns()
        }
        # Ensures the legacy minimal query (which does not expand alias references)
        # does not contain alias references we removed when creating minimal_query.
        if legacy_references - ast_column_names:
            metrics.increment("columns.skip_invalid_legacy_query")
            return None

        result = runner(minimal_query, request_settings)
        del minimal_query

        if not result.result["data"]:
            return None

        # Making a copy just in case runner returned None (which would drive the execution
        # strategy to ignore the result of this splitter and try the next one).
        query = copy.deepcopy(query)

        event_ids = list(
            set([event[self.__id_column] for event in result.result["data"]]))
        if len(event_ids) > settings.COLUMN_SPLIT_MAX_RESULTS:
            # We may be runing a query that is beyond clickhouse maximum query size,
            # so we cowardly abandon.
            metrics.increment(
                "column_splitter.intermediate_results_beyond_limit")
            return None

        query.add_conditions([(self.__id_column, "IN", event_ids)])
        query.add_condition_to_ast(
            in_condition(
                None,
                ColumnExpr(None, None, self.__id_column),
                [LiteralExpr(None, e_id) for e_id in event_ids],
            ))
        query.set_offset(0)
        # TODO: This is technically wrong. Event ids are unique per project, not globally.
        # So, if the minimal query only returned the same event_id from two projects, we
        # would be underestimating the limit here.
        query.set_limit(len(event_ids))

        project_ids = list(
            set([
                event[self.__project_column] for event in result.result["data"]
            ]))
        _replace_condition(
            query,
            self.__project_column,
            "IN",
            project_ids,
        )
        _replace_ast_condition(
            query,
            self.__project_column,
            "IN",
            literals_tuple(None,
                           [LiteralExpr(None, p_id) for p_id in project_ids]),
        )

        timestamps = [
            event[self.__timestamp_column] for event in result.result["data"]
        ]
        _replace_condition(
            query,
            self.__timestamp_column,
            ">=",
            util.parse_datetime(min(timestamps)).isoformat(),
        )
        _replace_ast_condition(
            query,
            self.__timestamp_column,
            ">=",
            LiteralExpr(None, util.parse_datetime(min(timestamps))),
        )
        # We add 1 second since this gets translated to ('timestamp', '<', to_date)
        # and events are stored with a granularity of 1 second.
        _replace_condition(
            query,
            self.__timestamp_column,
            "<",
            (util.parse_datetime(max(timestamps)) +
             timedelta(seconds=1)).isoformat(),
        )
        _replace_ast_condition(
            query,
            self.__timestamp_column,
            "<",
            LiteralExpr(
                None,
                (util.parse_datetime(max(timestamps)) + timedelta(seconds=1)),
            ),
        )

        return runner(query, request_settings)
Beispiel #22
0
from snuba.query.logical import OrderBy, OrderByDirection, Query, SelectedExpression
from snuba.query.snql.parser import parse_snql_query

test_cases = [
    pytest.param(
        "MATCH(blah)WHEREa<3COLLECT4-5,3*g(c),c",
        Query(
            {},
            None,
            selected_columns=[
                SelectedExpression(
                    "4-5",
                    FunctionCall(
                        None,
                        "minus",
                        (
                            Literal(None, 4),
                            Literal(None, 5),
                        ),
                    ),
                ),
                SelectedExpression(
                    "3*g(c)",
                    FunctionCall(
                        None,
                        "multiply",
                        (
                            Literal(None, 3),
                            FunctionCall(
                                None,
                                "g",
Beispiel #23
0
def test_events_boolean_context() -> None:
    columns = ColumnSet([
        ("device_charging", Nullable(UInt(8))),
        ("contexts", Nested([("key", String()), ("value", String())])),
    ])
    query = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "arrayElement",
                        (
                            Column(None, None, "contexts.value"),
                            FunctionCall(
                                None,
                                "indexOf",
                                (
                                    Column(None, None, "contexts.key"),
                                    Literal(None, "device.charging"),
                                ),
                            ),
                        ),
                    ),
                )
            ],
        ))

    expected = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "multiIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.EQ,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                Literal(None, ""),
                            ),
                            Literal(None, ""),
                            binary_condition(
                                None,
                                ConditionFunctions.IN,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                literals_tuple(None, [
                                    Literal(None, "1"),
                                    Literal(None, "True")
                                ]),
                            ),
                            Literal(None, "True"),
                            Literal(None, "False"),
                        ),
                    ),
                )
            ],
        ))

    settings = HTTPRequestSettings()
    MappingColumnPromoter({
        "contexts": {
            "device.charging": "device_charging"
        }
    }).process_query(query, settings)
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert (query.get_selected_columns_from_ast() ==
            expected.get_selected_columns_from_ast())
Beispiel #24
0
from snuba.query.logical import OrderBy, OrderByDirection, Query, SelectedExpression
from snuba.query.parser import parse_query
from snuba.query.parser.exceptions import AliasShadowingException, CyclicAliasException

test_cases = [
    pytest.param(
        {
            "selected_columns": ["column1"],
            "groupby": ["column2", "column3"],
            "aggregations": [["test_func", "column4", "test_func_alias"]],
        },
        Query(
            {},
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression("column2", Column("column2", None,
                                                     "column2")),
                SelectedExpression("column3", Column("column3", None,
                                                     "column3")),
                SelectedExpression(
                    "test_func_alias",
                    FunctionCall(
                        "test_func_alias",
                        "test_func",
                        (Column("column4", None, "column4"), ),
                    ),
                ),
                SelectedExpression("column1", Column("column1", None,
                                                     "column1")),
            ],
            groupby=[
                Column("column2", None, "column2"),
Beispiel #25
0
def test_apdex_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                FunctionCall(
                    "perf", "apdex", (Column(None, None, "column1"), Literal(None, 300))
                ),
            ),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    plus(
                        FunctionCall(
                            None,
                            "countIf",
                            (
                                binary_condition(
                                    None,
                                    ConditionFunctions.LTE,
                                    Column(None, None, "column1"),
                                    Literal(None, 300),
                                ),
                            ),
                        ),
                        divide(
                            FunctionCall(
                                None,
                                "countIf",
                                (
                                    binary_condition(
                                        None,
                                        BooleanFunctions.AND,
                                        binary_condition(
                                            None,
                                            ConditionFunctions.GT,
                                            Column(None, None, "column1"),
                                            Literal(None, 300),
                                        ),
                                        binary_condition(
                                            None,
                                            ConditionFunctions.LTE,
                                            Column(None, None, "column1"),
                                            multiply(
                                                Literal(None, 300), Literal(None, 4)
                                            ),
                                        ),
                                    ),
                                ),
                            ),
                            Literal(None, 2),
                        ),
                    ),
                    FunctionCall(None, "count", (),),
                    "perf",
                ),
            ),
        ],
    )

    apdex_processor(ColumnSet([])).process_query(unprocessed, HTTPRequestSettings())
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(plus(countIf(lessOrEquals(column1, 300)), "
        "divide(countIf(greater(column1, 300) AND "
        "lessOrEquals(column1, multiply(300, 4))), 2)), count()) AS perf)"
    )
Beispiel #26
0
def test_replace_expression():
    """
    Create a query with the new AST and replaces a function with a different function
    replaces f1(...) with tag(f1)
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(None, ConditionFunctions.EQ, function_1,
                                 Literal(None, "1"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        order_by=[orderby],
    )

    def replace(exp: Expression) -> Expression:
        if isinstance(exp, FunctionCall) and exp.function_name == "f1":
            return FunctionCall(exp.alias, "tag", (Literal(None, "f1"), ))
        return exp

    query.transform_expressions(replace)

    expected_query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "alias", FunctionCall("alias", "tag", (Literal(None, "f1"), )))
        ],
        array_join=None,
        condition=binary_condition(
            None,
            ConditionFunctions.EQ,
            FunctionCall("alias", "tag", (Literal(None, "f1"), )),
            Literal(None, "1"),
        ),
        groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"), ))],
        having=None,
        order_by=[orderby],
    )

    assert (query.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert query.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast(
    )
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast(
    )

    assert list(query.get_all_expressions()) == list(
        expected_query.get_all_expressions())
Beispiel #27
0
    ConditionFunctions,
    binary_condition,
)
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.logical import Query, SelectedExpression
from snuba.querylog.query_metadata import ClickhouseQueryProfile, FilterProfile
from snuba.state import safe_dumps

test_cases = [
    pytest.param(
        ClickhouseQuery(
            Query(
                {},
                TableSource("events", ColumnSet([])),
                selected_columns=[
                    SelectedExpression("column2",
                                       Column("column2", None, "column2")),
                    SelectedExpression(
                        "something",
                        FunctionCall(
                            "something",
                            "arrayJoin",
                            (Column(None, None, "contexts.key"), ),
                        ),
                    ),
                ],
                condition=binary_condition(
                    None,
                    BooleanFunctions.AND,
                    binary_condition(
                        None,
                        ConditionFunctions.GTE,
Beispiel #28
0
 def visit_selected_expression(
     self, node: Node, visited_children: Tuple[Expression, Any]
 ) -> SelectedExpression:
     exp, _ = visited_children
     return SelectedExpression(node.text.strip(), exp)
Beispiel #29
0
from snuba.clickhouse.columns import ColumnSet
from snuba.datasets.schemas.tables import TableSource
from snuba.query.expressions import Column, CurriedFunctionCall, FunctionCall, Literal
from snuba.query.logical import Query, SelectedExpression
from snuba.query.processors.basic_functions import BasicFunctionsProcessor
from snuba.request.request_settings import HTTPRequestSettings

test_data = [
    (
        Query(
            {},
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression(
                    "alias",
                    FunctionCall("alias", "uniq",
                                 (Column(None, None, "column1"), )),
                ),
                SelectedExpression(
                    "alias2",
                    FunctionCall("alias2", "emptyIfNull",
                                 (Column(None, None, "column2"), )),
                ),
            ],
        ),
        Query(
            {},
            TableSource("events", ColumnSet([])),
            selected_columns=[
                SelectedExpression(
                    "alias",
Beispiel #30
0
def test_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "isHandled",
                    tuple(),
                ),
            ),
        ],
    )

    expected = Query(
        {},
        TableSource("events", columnset),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x", ),
                            binary_condition(
                                None,
                                BooleanFunctions.OR,
                                FunctionCall(None, "isNull",
                                             (Argument(None, "x"), )),
                                binary_condition(
                                    None,
                                    ConditionFunctions.EQ,
                                    FunctionCall(None, "assumeNotNull",
                                                 (Argument(None, "x"), )),
                                    Literal(None, 1),
                                ),
                            ),
                        ),
                        Column(None, None,
                               "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset)
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert ret == (
        "(arrayExists((x -> (isNull(x) OR equals(assumeNotNull(x), 1))), exception_stacks.mechanism_handled) AS result)"
    )