Beispiel #1
0
    def add_conditions(
        self,
        timestamp: datetime,
        offset: Optional[int],
        query: Union[CompositeQuery[Entity], Query],
    ) -> None:
        # TODO: Support composite queries with multiple entities.
        from_clause = query.get_from_clause()
        if not isinstance(from_clause, Entity):
            raise InvalidSubscriptionError("Only simple queries are supported")
        entity = get_entity(from_clause.key)
        required_timestamp_column = entity.required_time_column
        if required_timestamp_column is None:
            raise InvalidSubscriptionError(
                "Entity must have a timestamp column for subscriptions")

        conditions_to_add: Sequence[Expression] = [
            binary_condition(
                ConditionFunctions.EQ,
                Column(None, None, "project_id"),
                Literal(None, self.project_id),
            ),
            binary_condition(
                ConditionFunctions.GTE,
                Column(None, None, required_timestamp_column),
                Literal(None,
                        (timestamp - timedelta(seconds=self.time_window_sec))),
            ),
            binary_condition(
                ConditionFunctions.LT,
                Column(None, None, required_timestamp_column),
                Literal(None, timestamp),
            ),
            *self.entity_subscription.
            get_entity_subscription_conditions_for_snql(offset),
        ]

        new_condition = combine_and_conditions(conditions_to_add)
        condition = query.get_condition()
        if condition:
            new_condition = binary_condition(BooleanFunctions.AND, condition,
                                             new_condition)

        query.set_ast_condition(new_condition)
def test_event_id_column_format_expressions() -> None:
    unprocessed = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression("the_event_id",
                               Column("the_event_id", None, "event_id")),
        ],
    )
    expected = Query(
        Table("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(
                "transaction.duration",
                Column("transaction.duration", None, "duration")),
            SelectedExpression(
                "the_event_id",
                FunctionCall(
                    "the_event_id",
                    "replaceAll",
                    (
                        FunctionCall(
                            None,
                            "toString",
                            (Column(None, None, "event_id"), ),
                        ),
                        Literal(None, "-"),
                        Literal(None, ""),
                    ),
                ),
            ),
        ],
    )

    UUIDColumnProcessor({"event_id"}).process_query(unprocessed,
                                                    HTTPQuerySettings())
    assert expected.get_selected_columns() == unprocessed.get_selected_columns(
    )

    formatted = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter())
    assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
Beispiel #3
0
def test_first_level_conditions() -> None:
    c1 = binary_condition(
        ConditionFunctions.EQ, Column(None, "table1", "column1"), Literal(None, "test"),
    )
    c2 = binary_condition(
        ConditionFunctions.EQ, Column(None, "table2", "column2"), Literal(None, "test"),
    )
    c3 = binary_condition(
        ConditionFunctions.EQ, Column(None, "table3", "column3"), Literal(None, "test"),
    )

    cond = binary_condition(
        BooleanFunctions.AND, binary_condition(BooleanFunctions.AND, c1, c2), c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.AND,
        FunctionCall(
            None, "equals", (FunctionCall(None, "and", (c1, c2)), Literal(None, 1))
        ),
        c3,
    )
    assert get_first_level_and_conditions(cond) == [c1, c2, c3]

    cond = binary_condition(
        BooleanFunctions.OR, binary_condition(BooleanFunctions.AND, c1, c2), c3,
    )
    assert get_first_level_or_conditions(cond) == [
        binary_condition(BooleanFunctions.AND, c1, c2),
        c3,
    ]

    cond = binary_condition(
        ConditionFunctions.EQ,
        binary_condition(
            BooleanFunctions.OR, c1, binary_condition(BooleanFunctions.AND, c2, c3)
        ),
        Literal(None, 1),
    )
    assert get_first_level_or_conditions(cond) == [
        c1,
        binary_condition(BooleanFunctions.AND, c2, c3),
    ]
Beispiel #4
0
def test_iterate_over_query():
    """
    Creates a query with the new AST and iterate over all expressions.
    """
    column1 = Column(None, "t1", "c1")
    column2 = Column(None, "t1", "c2")
    function_1 = FunctionCall("alias", "f1", (column1, column2))
    function_2 = FunctionCall("alias", "f2", (column2, ))

    condition = binary_condition(None, ConditionFunctions.EQ, column1,
                                 Literal(None, "1"))

    orderby = OrderBy(OrderByDirection.ASC, function_2)

    query = Query(
        {},
        TableSource("my_table", ColumnSet([])),
        selected_columns=[SelectedExpression("alias", function_1)],
        array_join=None,
        condition=condition,
        groupby=[function_1],
        having=None,
        order_by=[orderby],
    )

    expected_expressions = [
        # selected columns
        column1,
        column2,
        function_1,
        # condition
        column1,
        Literal(None, "1"),
        condition,
        # groupby
        column1,
        column2,
        function_1,
        # order by
        column2,
        function_2,
    ]

    assert list(query.get_all_expressions()) == expected_expressions
Beispiel #5
0
def test_not_handled_processor() -> None:
    columnset = ColumnSet([])
    unprocessed = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result", FunctionCall("result", "notHandled", tuple(),),
            ),
        ],
    )

    expected = Query(
        QueryEntity(EntityKey.EVENTS, ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "id")),
            SelectedExpression(
                "result",
                FunctionCall(
                    "result",
                    "arrayExists",
                    (
                        Lambda(
                            None,
                            ("x",),
                            binary_condition(
                                BooleanFunctions.AND,
                                FunctionCall(None, "isNotNull", (Argument(None, "x"),)),
                                binary_condition(
                                    ConditionFunctions.EQ,
                                    FunctionCall(
                                        None, "assumeNotNull", (Argument(None, "x"),)
                                    ),
                                    Literal(None, 0),
                                ),
                            ),
                        ),
                        Column(None, None, "exception_stacks.mechanism_handled"),
                    ),
                ),
            ),
        ],
    )
    processor = handled_functions.HandledFunctionsProcessor(
        "exception_stacks.mechanism_handled", columnset
    )
    processor.process_query(unprocessed, HTTPRequestSettings())

    assert expected.get_selected_columns() == unprocessed.get_selected_columns()

    ret = unprocessed.get_selected_columns()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)"
    )
Beispiel #6
0
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "user":
                    return FunctionCall(
                        exp.alias,
                        "nullIf",
                        (Column(None, None, "user"), Literal(None, "")),
                    )

            return exp
def build_in(project_column: str, projects: Sequence[int]) -> Expression:
    return FunctionCall(
        None,
        "in",
        (
            Column(f"_snuba_{project_column}", None, project_column),
            FunctionCall(None, "tuple",
                         tuple([Literal(None, p) for p in projects])),
        ),
    )
Beispiel #8
0
def build_time_condition(
    time_columns: str, from_date: datetime, to_date: datetime
) -> Expression:
    return binary_condition(
        None,
        BooleanFunctions.AND,
        binary_condition(
            None,
            ConditionFunctions.GTE,
            Column(None, None, time_columns),
            Literal(None, from_date),
        ),
        binary_condition(
            None,
            ConditionFunctions.LT,
            Column(None, None, time_columns),
            Literal(None, to_date),
        ),
    )
Beispiel #9
0
def build_in(column: str, items: Sequence[int]) -> Expression:
    return FunctionCall(
        None,
        "in",
        (
            Column(None, None, column),
            FunctionCall(None, "tuple", tuple(
                [Literal(None, p) for p in items])),
        ),
    )
Beispiel #10
0
 def transform(match: MatchResult, exp: Expression) -> Expression:
     assert isinstance(exp, Column)  # mypy
     return FunctionCall(
         None,
         "nullIf",
         (
             Column(None, None, exp.column_name),
             Literal(None, ""),
         ),
     )
Beispiel #11
0
def test_failure_rate_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression(name=None, expression=Column(None, None, "column2")),
            SelectedExpression(
                "perf",
                divide(
                    FunctionCall(
                        None,
                        "countIf",
                        (
                            combine_and_conditions(
                                [
                                    binary_condition(
                                        None,
                                        ConditionFunctions.NEQ,
                                        Column(None, None, "transaction_status"),
                                        Literal(None, code),
                                    )
                                    for code in [0, 1, 2]
                                ]
                            ),
                        ),
                    ),
                    count(),
                    "perf",
                ),
            ),
        ],
    )

    failure_rate_processor(ColumnSet([])).process_query(
        unprocessed, HTTPRequestSettings()
    )
    assert (
        expected.get_selected_columns_from_ast()
        == unprocessed.get_selected_columns_from_ast()
    )

    ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept(
        ClickhouseExpressionFormatter()
    )
    assert ret == (
        "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)"
    )
def test_events_column_format_expressions() -> None:
    unprocessed = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression("the_group_id",
                               Column("the_group_id", None, "group_id")),
            SelectedExpression("the_message",
                               Column("the_message", None, "message")),
        ],
    )
    expected = Query(
        {},
        TableSource("events", ColumnSet([])),
        selected_columns=[
            SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")),
            SelectedExpression(
                "the_group_id",
                FunctionCall(
                    "the_group_id",
                    "nullIf",
                    (
                        Column(None, None, "group_id"),
                        Literal(None, 0),
                    ),
                ),
            ),
            SelectedExpression(
                "the_message",
                FunctionCall(
                    "the_message",
                    "coalesce",
                    (
                        Column(None, None, "search_message"),
                        Column(None, None, "message"),
                    ),
                ),
            ),
        ],
    )

    EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings())
    assert (expected.get_selected_columns_from_ast() ==
            unprocessed.get_selected_columns_from_ast())

    expected = (
        "(nullIf(group_id, 0) AS the_group_id)",
        "(coalesce(search_message, message) AS the_message)",
    )

    for idx, column in enumerate(
            unprocessed.get_selected_columns_from_ast()[1:]):
        formatted = column.expression.accept(ClickhouseExpressionFormatter())
        assert expected[idx] == formatted
Beispiel #13
0
def test_mand_conditions(table: str,
                         mand_conditions: List[MandatoryCondition]) -> None:

    body = {"conditions": [["d", "=", "1"], ["c", "=", "3"]]}

    query = Query(
        copy.deepcopy(body),
        TableSource(table, None, mand_conditions, ["c1"]),
        None,
        None,
        binary_condition(
            None,
            BooleanFunctions.AND,
            binary_condition(
                None,
                OPERATOR_TO_FUNCTION["="],
                Column("d", None, "d"),
                Literal(None, "1"),
            ),
            binary_condition(
                None,
                OPERATOR_TO_FUNCTION["="],
                Column("c", None, "c"),
                Literal(None, "3"),
            ),
        ),
    )

    query_ast_copy = copy.deepcopy(query)

    request_settings = HTTPRequestSettings(consistent=True)
    processor = MandatoryConditionApplier()
    processor.process_query(query, request_settings)

    body["conditions"].extend([c.legacy for c in mand_conditions])
    assert query.get_conditions() == body["conditions"]

    query_ast_copy.add_condition_to_ast(
        combine_and_conditions([c.ast for c in mand_conditions]))

    assert query.get_condition_from_ast(
    ) == query_ast_copy.get_condition_from_ast()
def test_granularity_added(
    entity_key: EntityKey,
    column: str,
    requested_granularity: Optional[int],
    query_granularity: int,
) -> None:
    query = Query(
        QueryEntity(entity_key, ColumnSet([])),
        selected_columns=[
            SelectedExpression(column, Column(None, None, column))
        ],
        condition=binary_condition(ConditionFunctions.EQ,
                                   Column(None, None, "metric_id"),
                                   Literal(None, 123)),
        granularity=(requested_granularity),
    )

    try:
        GranularityProcessor().process_query(query, HTTPQuerySettings())
    except InvalidGranularityException:
        assert query_granularity is None
    else:
        assert query == Query(
            QueryEntity(entity_key, ColumnSet([])),
            selected_columns=[
                SelectedExpression(column, Column(None, None, column))
            ],
            condition=binary_condition(
                BooleanFunctions.AND,
                binary_condition(
                    ConditionFunctions.EQ,
                    Column(None, None, "granularity"),
                    Literal(None, query_granularity),
                ),
                binary_condition(
                    ConditionFunctions.EQ,
                    Column(None, None, "metric_id"),
                    Literal(None, 123),
                ),
            ),
            granularity=(requested_granularity),
        )
Beispiel #15
0
def test_visit_expression():
    col1 = Column("al", "c1", "t1")
    literal1 = Literal("al2", "test")
    f1 = FunctionCall("al3", "f1", [col1, literal1])

    col2 = Column("al4", "c2", "t1")
    literal2 = Literal("al5", "test2")
    f2 = FunctionCall("al6", "f2", [col2, literal2])

    curried = CurriedFunctionCall("al7", f2, [f1])

    visitor = DummyVisitor()
    ret = curried.accept(visitor)

    expected = [curried, f2, col2, literal2, f1, col1, literal1]

    # Tests the state changes on the Visitor
    assert visitor.get_visited_nodes() == expected
    # Tests the return value of the visitor
    assert ret == expected
Beispiel #16
0
def nested_condition(
    column_name: str,
    key: str,
    operator: str,
    val: str,
) -> Expression:
    return binary_condition(
        operator,
        nested_expression(column_name, key),
        Literal(None, val),
    )
Beispiel #17
0
def test_tag_translation() -> None:
    translated = SubscriptableMapper(None, "tags", None, "tags").attempt_map(
        SubscriptableReference(
            "tags[release]", Column(None, None, "tags"), Literal(None, "release")
        ),
        SnubaClickhouseMappingTranslator(TranslationMappers()),
    )

    assert translated == FunctionCall(
        "tags[release]",
        "arrayElement",
        (
            Column(None, None, "tags.value"),
            FunctionCall(
                None,
                "indexOf",
                (Column(None, None, "tags.key"), Literal(None, "release")),
            ),
        ),
    )
Beispiel #18
0
def build_not_in(column: str, items: Sequence[int]) -> Expression:
    return FunctionCall(
        None,
        "notIn",
        (
            FunctionCall(None, "assumeNotNull",
                         (Column(None, None, column), )),
            FunctionCall(None, "tuple", tuple(
                [Literal(None, p) for p in items])),
        ),
    )
Beispiel #19
0
def test_mand_conditions(table: str,
                         mand_conditions: List[FunctionCall]) -> None:

    query = Query(
        Table(
            table,
            ColumnSet([]),
            final=False,
            sampling_rate=None,
            mandatory_conditions=mand_conditions,
            prewhere_candidates=["c1"],
        ),
        None,
        None,
        binary_condition(
            BooleanFunctions.AND,
            binary_condition(
                OPERATOR_TO_FUNCTION["="],
                Column("d", None, "d"),
                Literal(None, "1"),
            ),
            binary_condition(
                OPERATOR_TO_FUNCTION["="],
                Column("c", None, "c"),
                Literal(None, "3"),
            ),
        ),
    )

    query_ast_copy = copy.deepcopy(query)

    request_settings = HTTPRequestSettings(consistent=True)
    processor = MandatoryConditionApplier()
    processor.process_query(query, request_settings)

    query_ast_copy.add_condition_to_ast(
        combine_and_conditions(mand_conditions))

    assert query.get_condition_from_ast(
    ) == query_ast_copy.get_condition_from_ast()
Beispiel #20
0
    def test_when_there_are_not_many_groups_to_exclude(self):
        request_settings = HTTPRequestSettings()
        state.set_config("max_group_ids_exclude", 5)
        replacer.set_project_exclude_groups(2, [100, 101, 102])

        self.extension.get_processor().process_query(
            self.query, self.valid_data, request_settings
        )

        expected = [
            ("project_id", "IN", [2]),
            (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]),
        ]
        assert self.query.get_conditions() == expected
        assert self.query.get_condition_from_ast() == FunctionCall(
            None,
            BooleanFunctions.AND,
            (
                FunctionCall(
                    None,
                    "notIn",
                    (
                        FunctionCall(
                            None, "assumeNotNull", (Column(None, "group_id", None),)
                        ),
                        FunctionCall(
                            None,
                            "tuple",
                            (
                                Literal(None, 100),
                                Literal(None, 101),
                                Literal(None, 102),
                            ),
                        ),
                    ),
                ),
                build_in("project_id", [2]),
            ),
        )
        assert not self.query.get_final()
Beispiel #21
0
 def visit_subscriptable_reference(
         self, exp: SubscriptableReference) -> SubExpression:
     assert (
         exp.column.table_name
     ), f"Invalid column expression in join: {exp}. Missing table alias"
     return SubqueryExpression(
         main_expression=SubscriptableReference(
             exp.alias,
             Column(exp.column.alias, None, exp.column.column_name),
             Literal(exp.key.alias, exp.key.value),
         ),
         subquery_alias=exp.column.table_name,
     )
        def process_column(exp: Expression) -> Expression:
            if isinstance(exp, Column):
                if exp.column_name == "group_id":
                    return FunctionCall(
                        exp.alias,
                        "nullIf",
                        (
                            Column(None, exp.table_name, exp.column_name),
                            Literal(None, 0),
                        ),
                    )

            return exp
Beispiel #23
0
 def attempt_map(
     self,
     expression: Column,
     children_translator: SnubaClickhouseStrictTranslator,
 ) -> Optional[FunctionCall]:
     if expression.column_name in self.columns:
         return identity(
             Literal(None, None),
             expression.alias or qualified_column(
                 expression.column_name, expression.table_name or ""),
         )
     else:
         return None
Beispiel #24
0
 def process_query(
     self,
     query: Query,
     extension_data: ExtensionData,
     request_settings: RequestSettings,
 ) -> None:
     from_date, to_date = self.get_time_limit(extension_data)
     query.set_granularity(extension_data["granularity"])
     query.add_condition_to_ast(
         binary_condition(
             BooleanFunctions.AND,
             binary_condition(
                 ConditionFunctions.GTE,
                 Column(None, None, self.__timestamp_column),
                 Literal(None, from_date),
             ),
             binary_condition(
                 ConditionFunctions.LT,
                 Column(None, None, self.__timestamp_column),
                 Literal(None, to_date),
             ),
         ))
Beispiel #25
0
 def attempt_map(
     self, expression: Column, children_translator: SnubaClickhouseStrictTranslator,
 ) -> Optional[Literal]:
     if expression.column_name in self.columns:
         return Literal(
             alias=expression.alias
             or qualified_column(
                 expression.column_name, expression.table_name or ""
             ),
             value=None,
         )
     else:
         return None
Beispiel #26
0
        def transform_expression(exp: Expression) -> Expression:
            if not isinstance(exp, SubscriptableReference):
                return exp

            key = exp.key
            if not isinstance(key.value, str) or not key.value.isdigit():
                raise InvalidExpressionException.from_args(
                    exp,
                    "Expected a string key containing an integer in subscriptable.",
                )

            return SubscriptableReference(exp.alias, exp.column,
                                          Literal(None, int(key.value)))
 def process_query(
     self,
     query: Query,
     extension_data: ExtensionData,
     request_settings: RequestSettings,
 ) -> None:
     organization_id = extension_data["organization"]
     query.add_condition_to_ast(
         binary_condition(
             ConditionFunctions.EQ,
             Column("_snuba_org_id", None, "org_id"),
             Literal(None, organization_id),
         ))
Beispiel #28
0
def test_nullable_nested_translation() -> None:
    translated = SubscriptableMapper(
        None, "measurements", None, "measurements", nullable=True
    ).attempt_map(
        SubscriptableReference(
            "measurements[lcp]",
            Column(None, None, "measurements"),
            Literal(None, "lcp"),
        ),
        SnubaClickhouseMappingTranslator(TranslationMappers()),
    )

    assert translated == _get_nullable_expr("measurements[lcp]")
def test_organization_extension_query_processing_happy_path():
    extension = OrganizationExtension()
    raw_data = {"organization": 2}

    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])))
    request_settings = HTTPRequestSettings()

    extension.get_processor().process_query(query, valid_data,
                                            request_settings)
    assert query.get_condition_from_ast() == binary_condition(
        None, ConditionFunctions.EQ, Column(None, None, "org_id"),
        Literal(None, 2))
Beispiel #30
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        self.__time_group_columns = {"bucketed_started": "started"}
        self.__time_parse_columns = ("started", "received")
        super().__init__(
            storages=[writable_storage, materialized_storage],
            # TODO: Once we are ready to expose the raw data model and select whether to use
            # materialized storage or the raw one here, replace this with a custom storage
            # selector that decides when to use the materialized data.
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=materialized_storage,
                mappers=TranslationMappers(columns=[
                    ColumnToCurriedFunction(
                        None,
                        "duration_quantiles",
                        FunctionCall(
                            None,
                            "quantilesIfMerge",
                            (Literal(None, 0.5), Literal(None, 0.9)),
                        ),
                        (Column(None, None, "duration_quantiles"), ),
                    ),
                    function_rule("sessions", "countIfMerge"),
                    function_rule("sessions_crashed", "countIfMerge"),
                    function_rule("sessions_abnormal", "countIfMerge"),
                    function_rule("users", "uniqIfMerge"),
                    function_rule("sessions_errored", "uniqIfMerge"),
                    function_rule("users_crashed", "uniqIfMerge"),
                    function_rule("users_abnormal", "uniqIfMerge"),
                    function_rule("users_errored", "uniqIfMerge"),
                ]),
            ),
            abstract_column_set=read_schema.get_columns(),
            writable_storage=writable_storage,
        )