def test_timeseries_column_format_expressions( granularity, ast_value, formatted_value ) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ Column("transaction.duration", "duration", None), Column("my_start", "bucketed_start", None), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[Column("transaction.duration", "duration", None), ast_value,], ) dataset = TransactionsDataset() TimeSeriesColumnProcessor( dataset._TimeSeriesDataset__time_group_columns ).process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == formatted_value
def test_timeseries_column_format_expressions(granularity: int, ast_value: FunctionCall, formatted_value: str) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(ast_value.alias, ast_value), ], ) dataset = TransactionsDataset() for processor in dataset.get_query_processors(): if isinstance(processor, TimeSeriesColumnProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_events_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", FunctionCall( "the_message", "coalesce", ( Column(None, None, "search_message"), Column(None, None, "message"), ), ), ), ], ) EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(coalesce(search_message, message) AS the_message)", ) for idx, column in enumerate( unprocessed.get_selected_columns_from_ast()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", ( combine_and_conditions( [ binary_condition( None, ConditionFunctions.NEQ, Column(None, None, "transaction_status"), Literal(None, code), ) for code in [0, 1, 2] ] ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings() ) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)" )
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", (binary_condition( None, ConditionFunctions.NOT_IN, Column(None, None, "transaction_status"), FunctionCall( None, "tuple", ( Literal(alias=None, value=0), Literal(alias=None, value=1), Literal(alias=None, value=2), ), ), ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == ( "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)" )
def test_timeseries_format_expressions( granularity: int, condition: Optional[FunctionCall], exp_column: FunctionCall, exp_condition: Optional[FunctionCall], formatted_column: str, formatted_condition: str, ) -> None: unprocessed = Query( {}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], condition=condition, granularity=granularity, ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(exp_column.alias, exp_column), ], condition=exp_condition, ) entity = TransactionsEntity() processors = entity.get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) assert expected.get_condition_from_ast( ) == unprocessed.get_condition_from_ast() ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_column if condition: ret = unprocessed.get_condition_from_ast().accept( ClickhouseExpressionFormatter()) assert formatted_condition == ret
def test_full_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, "granularity": 60, }, TableSource("my_table", ColumnSet([])), ) assert query.get_selected_columns() == ["c1", "c2", "c3"] assert query.get_aggregations() == [["count()", "", "count"]] assert query.get_groupby() == ["project_id"] assert query.get_conditions() == [["c1", "=", "a"]] assert query.get_arrayjoin() == "tags" assert query.get_having() == [["c4", "=", "c"]] assert query.get_orderby() == "event_id" assert query.get_limitby() == (100, "environment") assert query.get_sample() == 10 assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_data_source().format_from() == "my_table"
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_granularity: int, ): state.set_config('max_days', 1) extension = TimeSeriesExtension( default_granularity=60, default_window=datetime.timedelta(days=5), timestamp_column='timestamp', ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_granularity() == expected_granularity
def test_prewhere(initial_table, consistent, expected_table) -> None: state.set_config("enable_events_readonly_table", True) body = { "conditions": [ ["d", "=", "1"], ["c", "=", "3"], ["a", "=", "1"], ["b", "=", "2"], ], } cols = ColumnSet([("col", String())]) query = Query( body, TableSource(initial_table, cols, [["time", "=", "1"]], ["c1"]), ) request_settings = HTTPRequestSettings(consistent=consistent) processor = ReadOnlyTableSelector("sentry_dist", "sentry_ro") processor.process_query(query, request_settings) source = query.get_data_source() assert isinstance(source, TableSource) assert source.format_from() == expected_table assert source.get_columns() == cols assert source.get_prewhere_candidates() == ["c1"] assert source.get_mandatory_conditions() == [["time", "=", "1"]]
def query() -> ClickhouseQuery: return ClickhouseQuery( LogicalQuery( {"conditions": [("project_id", "IN", [2])]}, TableSource("my_table", ColumnSet([])), condition=build_in("project_id", [2]), ))
def test_project_extension_query_adds_rate_limits(): extension = ProjectExtension(processor=ProjectExtensionProcessor( project_column="project_id")) raw_data = {'project': [2, 3]} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) num_rate_limits_before_processing = len( request_settings.get_rate_limit_params()) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() # make sure a rate limit was added by the processing assert len(rate_limits) == num_rate_limits_before_processing + 1 most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 1000 assert most_recent_rate_limit.concurrent_limit == 1000
def test_transaction_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", Column("the_event_id", None, "event_id") ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"),), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) TransactionColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = Query( { "sample": 0.1, "totals": True, "limitby": (10, "environment") }, TableSource("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( None, "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), ) query.set_final(True) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = AstSqlQuery(query, request_settings) expected = { "from": "FROM my_table FINAL SAMPLE 0.1", "group": "GROUP BY (column1, table1.column2) WITH TOTALS", "having": "HAVING eq(column1, 123)", "array_join": "ARRAY JOIN column1", "limit": "LIMIT 100 OFFSET 50", "limitby": "LIMIT 10 BY environment", "order": "ORDER BY column1 ASC", "select": "SELECT column1, table1.column2", "where": "WHERE eq(column1, 'blabla')", } assert clickhouse_query.sql_data() == expected
def setup_method(self, test_method): super().setup_method(test_method) raw_data = {"project": 2} self.extension = ProjectExtension( processor=ProjectWithGroupsProcessor(project_column="project_id") ) self.valid_data = validate_jsonschema(raw_data, self.extension.get_schema()) self.query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),)
def test_col_replacement( initial_query: MutableMapping[str, Any], old_col: str, new_col: str, expected: Mapping[str, Any], ): query = Query(initial_query, TableSource("my_table", ColumnSet([]))) query.replace_column(old_col, new_col) assert expected == query.get_body()
def test_prewhere(query_body, keys, new_conditions, prewhere_conditions) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 query = Query(query_body, TableSource("my_table", ColumnSet([]), None, keys),) request_settings = HTTPRequestSettings() processor = PrewhereProcessor() processor.process_query(query, request_settings) assert query.get_conditions() == new_conditions assert query.get_prewhere() == prewhere_conditions
def test_organization_extension_query_processing_happy_path(): extension = OrganizationExtension() raw_data = {"organization": 2} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([]))) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_condition_from_ast() == binary_condition( None, ConditionFunctions.EQ, Column(None, None, "org_id"), Literal(None, 2))
def test_iterate_over_query(): """ Creates a query with the new AST and iterate over all expressions. """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2, )) condition = binary_condition(None, ConditionFunctions.EQ, column1, Literal(None, "1")) prewhere = binary_condition(None, ConditionFunctions.EQ, column2, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( TableSource("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], prewhere=prewhere, having=None, order_by=[orderby], ) expected_expressions = [ # selected columns column1, column2, function_1, # condition column1, Literal(None, "1"), condition, # groupby column1, column2, function_1, # order by column2, function_2, # prewhere column2, Literal(None, "2"), prewhere, ] assert list(query.get_all_expressions()) == expected_expressions
def test_project_extension_query_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_ast_conditions: Expression, ): extension = ProjectExtension(project_column="project_id") valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_condition_from_ast() == expected_ast_conditions
def test_organization_extension_query_processing_happy_path(): extension = OrganizationExtension() raw_data = {"organization": 2} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([]))) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == [("org_id", "=", 2)]
def test_edit_query(): query = Query( { "selected_columns": ["c1", "c2", "c3"], "conditions": [["c1", "=", "a"]], "arrayjoin": "tags", "having": [["c4", "=", "c"]], "groupby": ["project_id"], "aggregations": [["count()", "", "count"]], "orderby": "event_id", "limitby": (100, "environment"), "sample": 10, "limit": 100, "offset": 50, "totals": True, }, TableSource("my_table", ColumnSet([])), ) query.set_selected_columns(["c4"]) assert query.get_selected_columns() == ["c4"] query.set_aggregations([["different_agg()", "", "something"]]) assert query.get_aggregations() == [["different_agg()", "", "something"]] query.add_groupby(["more", "more2"]) assert query.get_groupby() == ["project_id", "more", "more2"] query.add_conditions([["c5", "=", "9"]]) assert query.get_conditions() == [ ["c1", "=", "a"], ["c5", "=", "9"], ] query.set_conditions([["c6", "=", "10"]]) assert query.get_conditions() == [ ["c6", "=", "10"], ] query.set_arrayjoin("not_tags") assert query.get_arrayjoin() == "not_tags" query.set_granularity(7200) assert query.get_granularity() == 7200 query.set_prewhere([["pc6", "=", "10"]]) assert query.get_prewhere() == [["pc6", "=", "10"]]
def test_project_extension_query_processing( raw_data: dict, expected_conditions: Sequence[Condition]): extension = ProjectExtension(processor=ProjectExtensionProcessor( project_column="project_id")) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions
def test_prewhere( query_body: MutableMapping[str, Any], keys: Sequence[str], new_ast_condition: Optional[Expression], new_prewhere_ast_condition: Optional[Expression], ) -> None: settings.MAX_PREWHERE_CONDITIONS = 2 events = get_dataset("events") query = parse_query(query_body, events) query.set_data_source(TableSource("my_table", ColumnSet([]), None, keys)) request_settings = HTTPRequestSettings() processor = PrewhereProcessor() processor.process_query(Query(query), request_settings) assert query.get_condition_from_ast() == new_ast_condition assert query.get_prewhere_ast() == new_prewhere_ast_condition
def test_project_extension_project_rate_limits_are_overridden(): extension = ProjectExtension(project_column="project_id") raw_data = {"project": [3, 4]} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() state.set_config("project_per_second_limit_3", 5) state.set_config("project_concurrent_limit_3", 10) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == "3" assert most_recent_rate_limit.per_second_limit == 5 assert most_recent_rate_limit.concurrent_limit == 10
def test_empty_query(): query = Query({}, TableSource("my_table", ColumnSet([]))) assert query.get_selected_columns() is None assert query.get_aggregations() is None assert query.get_groupby() is None assert query.get_conditions() is None assert query.get_arrayjoin() is None assert query.get_having() == [] assert query.get_orderby() is None assert query.get_limitby() is None assert query.get_sample() is None assert query.get_limit() is None assert query.get_offset() == 0 assert query.has_totals() is False assert query.get_data_source().format_from() == "my_table"
def test_mand_conditions(table: str, mand_conditions: List[MandatoryCondition]) -> None: body = {"conditions": [["d", "=", "1"], ["c", "=", "3"]]} query = Query( copy.deepcopy(body), TableSource(table, None, mand_conditions, ["c1"]), None, None, binary_condition( None, BooleanFunctions.AND, binary_condition( None, OPERATOR_TO_FUNCTION["="], Column("d", None, "d"), Literal(None, "1"), ), binary_condition( None, OPERATOR_TO_FUNCTION["="], Column("c", None, "c"), Literal(None, "3"), ), ), ) query_ast_copy = copy.deepcopy(query) request_settings = HTTPRequestSettings(consistent=True) processor = MandatoryConditionApplier() processor.process_query(query, request_settings) body["conditions"].extend([c.legacy for c in mand_conditions]) assert query.get_conditions() == body["conditions"] query_ast_copy.add_condition_to_ast( combine_and_conditions([c.ast for c in mand_conditions])) assert query.get_condition_from_ast( ) == query_ast_copy.get_condition_from_ast()
def test_query_parameters(): query = Query( {}, TableSource("my_table", ColumnSet([])), limitby=(100, "environment"), sample=10, limit=100, offset=50, totals=True, granularity=60, ) assert query.get_limitby() == (100, "environment") assert query.get_sample() == 10 assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_from_clause().format_from() == "my_table"
def test_handled_processor_invalid() -> None: columnset = ColumnSet([]) unprocessed = Query( {}, TableSource("events", columnset), selected_columns=[ SelectedExpression( "result", FunctionCall( "result", "isHandled", (Column(None, None, "type"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset) with pytest.raises(InvalidExpressionException): processor.process_query(unprocessed, HTTPRequestSettings())
def test_query_extension_processing( raw_data: dict, expected_conditions: Sequence[Condition], expected_ast_condition: Expression, expected_granularity: int, ): state.set_config("max_days", 1) extension = TimeSeriesExtension( default_granularity=60, default_window=timedelta(days=5), timestamp_column="timestamp", ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({"conditions": []}, TableSource("my_table", ColumnSet([])),) request_settings = HTTPRequestSettings() extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions assert query.get_condition_from_ast() == expected_ast_condition assert query.get_granularity() == expected_granularity
def process_query(self, query: Query, request_settings: RequestSettings) -> None: readonly_enabled = state.get_config("enable_events_readonly_table", False) if not readonly_enabled: return if request_settings.get_consistent(): return data_source = query.get_data_source() if data_source.format_from() != self.__table_to_replace: return new_source = TableSource( table_name=self.__read_only_table, columns=data_source.get_columns(), mandatory_conditions=data_source.get_mandatory_conditions(), prewhere_candidates=data_source.get_prewhere_candidates(), ) query.set_data_source(new_source)