def add_conditions( self, timestamp: datetime, offset: Optional[int], query: Union[CompositeQuery[Entity], Query], ) -> None: # TODO: Support composite queries with multiple entities. from_clause = query.get_from_clause() if not isinstance(from_clause, Entity): raise InvalidSubscriptionError("Only simple queries are supported") entity = get_entity(from_clause.key) required_timestamp_column = entity.required_time_column if required_timestamp_column is None: raise InvalidSubscriptionError( "Entity must have a timestamp column for subscriptions") conditions_to_add: Sequence[Expression] = [ binary_condition( ConditionFunctions.EQ, Column(None, None, "project_id"), Literal(None, self.project_id), ), binary_condition( ConditionFunctions.GTE, Column(None, None, required_timestamp_column), Literal(None, (timestamp - timedelta(seconds=self.time_window_sec))), ), binary_condition( ConditionFunctions.LT, Column(None, None, required_timestamp_column), Literal(None, timestamp), ), *self.entity_subscription. get_entity_subscription_conditions_for_snql(offset), ] new_condition = combine_and_conditions(conditions_to_add) condition = query.get_condition() if condition: new_condition = binary_condition(BooleanFunctions.AND, condition, new_condition) query.set_ast_condition(new_condition)
def test_event_id_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("the_event_id", Column("the_event_id", None, "event_id")), ], ) expected = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"), ), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) UUIDColumnProcessor({"event_id"}).process_query(unprocessed, HTTPQuerySettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns( ) formatted = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter()) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
def test_first_level_conditions() -> None: c1 = binary_condition( ConditionFunctions.EQ, Column(None, "table1", "column1"), Literal(None, "test"), ) c2 = binary_condition( ConditionFunctions.EQ, Column(None, "table2", "column2"), Literal(None, "test"), ) c3 = binary_condition( ConditionFunctions.EQ, Column(None, "table3", "column3"), Literal(None, "test"), ) cond = binary_condition( BooleanFunctions.AND, binary_condition(BooleanFunctions.AND, c1, c2), c3, ) assert get_first_level_and_conditions(cond) == [c1, c2, c3] cond = binary_condition( BooleanFunctions.AND, FunctionCall( None, "equals", (FunctionCall(None, "and", (c1, c2)), Literal(None, 1)) ), c3, ) assert get_first_level_and_conditions(cond) == [c1, c2, c3] cond = binary_condition( BooleanFunctions.OR, binary_condition(BooleanFunctions.AND, c1, c2), c3, ) assert get_first_level_or_conditions(cond) == [ binary_condition(BooleanFunctions.AND, c1, c2), c3, ] cond = binary_condition( ConditionFunctions.EQ, binary_condition( BooleanFunctions.OR, c1, binary_condition(BooleanFunctions.AND, c2, c3) ), Literal(None, 1), ) assert get_first_level_or_conditions(cond) == [ c1, binary_condition(BooleanFunctions.AND, c2, c3), ]
def test_iterate_over_query(): """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(None, ConditionFunctions.EQ, column1, Literal(None, "1")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, ] assert list(query.get_all_expressions()) == expected_expressions
def test_not_handled_processor() -> None: columnset = ColumnSet([]) unprocessed = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall("result", "notHandled", tuple(),), ), ], ) expected = Query( QueryEntity(EntityKey.EVENTS, ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayExists", ( Lambda( None, ("x",), binary_condition( BooleanFunctions.AND, FunctionCall(None, "isNotNull", (Argument(None, "x"),)), binary_condition( ConditionFunctions.EQ, FunctionCall( None, "assumeNotNull", (Argument(None, "x"),) ), Literal(None, 0), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset ) processor.process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns() ret = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(arrayExists((x -> isNotNull(x) AND equals(assumeNotNull(x), 0)), exception_stacks.mechanism_handled) AS result)" )
def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "user": return FunctionCall( exp.alias, "nullIf", (Column(None, None, "user"), Literal(None, "")), ) return exp
def build_in(project_column: str, projects: Sequence[int]) -> Expression: return FunctionCall( None, "in", ( Column(f"_snuba_{project_column}", None, project_column), FunctionCall(None, "tuple", tuple([Literal(None, p) for p in projects])), ), )
def build_time_condition( time_columns: str, from_date: datetime, to_date: datetime ) -> Expression: return binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GTE, Column(None, None, time_columns), Literal(None, from_date), ), binary_condition( None, ConditionFunctions.LT, Column(None, None, time_columns), Literal(None, to_date), ), )
def build_in(column: str, items: Sequence[int]) -> Expression: return FunctionCall( None, "in", ( Column(None, None, column), FunctionCall(None, "tuple", tuple( [Literal(None, p) for p in items])), ), )
def transform(match: MatchResult, exp: Expression) -> Expression: assert isinstance(exp, Column) # mypy return FunctionCall( None, "nullIf", ( Column(None, None, exp.column_name), Literal(None, ""), ), )
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", ( combine_and_conditions( [ binary_condition( None, ConditionFunctions.NEQ, Column(None, None, "transaction_status"), Literal(None, code), ) for code in [0, 1, 2] ] ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings() ) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)" )
def test_events_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", FunctionCall( "the_message", "coalesce", ( Column(None, None, "search_message"), Column(None, None, "message"), ), ), ), ], ) EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(coalesce(search_message, message) AS the_message)", ) for idx, column in enumerate( unprocessed.get_selected_columns_from_ast()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_mand_conditions(table: str, mand_conditions: List[MandatoryCondition]) -> None: body = {"conditions": [["d", "=", "1"], ["c", "=", "3"]]} query = Query( copy.deepcopy(body), TableSource(table, None, mand_conditions, ["c1"]), None, None, binary_condition( None, BooleanFunctions.AND, binary_condition( None, OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( None, OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) body["conditions"].extend([c.legacy for c in mand_conditions]) assert query.get_conditions() == body["conditions"] query_ast_copy.add_condition_to_ast( combine_and_conditions([c.ast for c in mand_conditions])) assert query.get_condition_from_ast( ) == query_ast_copy.get_condition_from_ast()
def test_granularity_added( entity_key: EntityKey, column: str, requested_granularity: Optional[int], query_granularity: int, ) -> None: query = Query( QueryEntity(entity_key, ColumnSet([])), selected_columns=[ SelectedExpression(column, Column(None, None, column)) ], condition=binary_condition(ConditionFunctions.EQ, Column(None, None, "metric_id"), Literal(None, 123)), granularity=(requested_granularity), ) try: GranularityProcessor().process_query(query, HTTPQuerySettings()) except InvalidGranularityException: assert query_granularity is None else: assert query == Query( QueryEntity(entity_key, ColumnSet([])), selected_columns=[ SelectedExpression(column, Column(None, None, column)) ], condition=binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.EQ, Column(None, None, "granularity"), Literal(None, query_granularity), ), binary_condition( ConditionFunctions.EQ, Column(None, None, "metric_id"), Literal(None, 123), ), ), granularity=(requested_granularity), )
def test_visit_expression(): col1 = Column("al", "c1", "t1") literal1 = Literal("al2", "test") f1 = FunctionCall("al3", "f1", [col1, literal1]) col2 = Column("al4", "c2", "t1") literal2 = Literal("al5", "test2") f2 = FunctionCall("al6", "f2", [col2, literal2]) curried = CurriedFunctionCall("al7", f2, [f1]) visitor = DummyVisitor() ret = curried.accept(visitor) expected = [curried, f2, col2, literal2, f1, col1, literal1] # Tests the state changes on the Visitor assert visitor.get_visited_nodes() == expected # Tests the return value of the visitor assert ret == expected
def nested_condition( column_name: str, key: str, operator: str, val: str, ) -> Expression: return binary_condition( operator, nested_expression(column_name, key), Literal(None, val), )
def test_tag_translation() -> None: translated = SubscriptableMapper(None, "tags", None, "tags").attempt_map( SubscriptableReference( "tags[release]", Column(None, None, "tags"), Literal(None, "release") ), SnubaClickhouseMappingTranslator(TranslationMappers()), ) assert translated == FunctionCall( "tags[release]", "arrayElement", ( Column(None, None, "tags.value"), FunctionCall( None, "indexOf", (Column(None, None, "tags.key"), Literal(None, "release")), ), ), )
def build_not_in(column: str, items: Sequence[int]) -> Expression: return FunctionCall( None, "notIn", ( FunctionCall(None, "assumeNotNull", (Column(None, None, column), )), FunctionCall(None, "tuple", tuple( [Literal(None, p) for p in items])), ), )
def test_mand_conditions(table: str, mand_conditions: List[FunctionCall]) -> None: query = Query( Table( table, ColumnSet([]), final=False, sampling_rate=None, mandatory_conditions=mand_conditions, prewhere_candidates=["c1"], ), None, None, binary_condition( BooleanFunctions.AND, binary_condition( OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) query_ast_copy.add_condition_to_ast( combine_and_conditions(mand_conditions)) assert query.get_condition_from_ast( ) == query_ast_copy.get_condition_from_ast()
def test_when_there_are_not_many_groups_to_exclude(self): request_settings = HTTPRequestSettings() state.set_config("max_group_ids_exclude", 5) replacer.set_project_exclude_groups(2, [100, 101, 102]) self.extension.get_processor().process_query( self.query, self.valid_data, request_settings ) expected = [ ("project_id", "IN", [2]), (["assumeNotNull", ["group_id"]], "NOT IN", [100, 101, 102]), ] assert self.query.get_conditions() == expected assert self.query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall( None, "assumeNotNull", (Column(None, "group_id", None),) ), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ), ) assert not self.query.get_final()
def visit_subscriptable_reference( self, exp: SubscriptableReference) -> SubExpression: assert ( exp.column.table_name ), f"Invalid column expression in join: {exp}. Missing table alias" return SubqueryExpression( main_expression=SubscriptableReference( exp.alias, Column(exp.column.alias, None, exp.column.column_name), Literal(exp.key.alias, exp.key.value), ), subquery_alias=exp.column.table_name, )
def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "group_id": return FunctionCall( exp.alias, "nullIf", ( Column(None, exp.table_name, exp.column_name), Literal(None, 0), ), ) return exp
def attempt_map( self, expression: Column, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: if expression.column_name in self.columns: return identity( Literal(None, None), expression.alias or qualified_column( expression.column_name, expression.table_name or ""), ) else: return None
def process_query( self, query: Query, extension_data: ExtensionData, request_settings: RequestSettings, ) -> None: from_date, to_date = self.get_time_limit(extension_data) query.set_granularity(extension_data["granularity"]) query.add_condition_to_ast( binary_condition( BooleanFunctions.AND, binary_condition( ConditionFunctions.GTE, Column(None, None, self.__timestamp_column), Literal(None, from_date), ), binary_condition( ConditionFunctions.LT, Column(None, None, self.__timestamp_column), Literal(None, to_date), ), ))
def attempt_map( self, expression: Column, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[Literal]: if expression.column_name in self.columns: return Literal( alias=expression.alias or qualified_column( expression.column_name, expression.table_name or "" ), value=None, ) else: return None
def transform_expression(exp: Expression) -> Expression: if not isinstance(exp, SubscriptableReference): return exp key = exp.key if not isinstance(key.value, str) or not key.value.isdigit(): raise InvalidExpressionException.from_args( exp, "Expected a string key containing an integer in subscriptable.", ) return SubscriptableReference(exp.alias, exp.column, Literal(None, int(key.value)))
def process_query( self, query: Query, extension_data: ExtensionData, request_settings: RequestSettings, ) -> None: organization_id = extension_data["organization"] query.add_condition_to_ast( binary_condition( ConditionFunctions.EQ, Column("_snuba_org_id", None, "org_id"), Literal(None, organization_id), ))
def test_nullable_nested_translation() -> None: translated = SubscriptableMapper( None, "measurements", None, "measurements", nullable=True ).attempt_map( SubscriptableReference( "measurements[lcp]", Column(None, None, "measurements"), Literal(None, "lcp"), ), SnubaClickhouseMappingTranslator(TranslationMappers()), ) assert translated == _get_nullable_expr("measurements[lcp]")
def test_organization_extension_query_processing_happy_path(): extension = OrganizationExtension() raw_data = {"organization": 2} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([]))) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_condition_from_ast() == binary_condition( None, ConditionFunctions.EQ, Column(None, None, "org_id"), Literal(None, 2))
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() self.__time_group_columns = {"bucketed_started": "started"} self.__time_parse_columns = ("started", "received") super().__init__( storages=[writable_storage, materialized_storage], # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. query_plan_builder=SingleStorageQueryPlanBuilder( storage=materialized_storage, mappers=TranslationMappers(columns=[ ColumnToCurriedFunction( None, "duration_quantiles", FunctionCall( None, "quantilesIfMerge", (Literal(None, 0.5), Literal(None, 0.9)), ), (Column(None, None, "duration_quantiles"), ), ), function_rule("sessions", "countIfMerge"), function_rule("sessions_crashed", "countIfMerge"), function_rule("sessions_abnormal", "countIfMerge"), function_rule("users", "uniqIfMerge"), function_rule("sessions_errored", "uniqIfMerge"), function_rule("users_crashed", "uniqIfMerge"), function_rule("users_abnormal", "uniqIfMerge"), function_rule("users_errored", "uniqIfMerge"), ]), ), abstract_column_set=read_schema.get_columns(), writable_storage=writable_storage, )