def test_timeseries_column_format_expressions( granularity, ast_value, formatted_value ) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ Column("transaction.duration", "duration", None), Column("my_start", "bucketed_start", None), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[Column("transaction.duration", "duration", None), ast_value,], ) dataset = TransactionsDataset() TimeSeriesColumnProcessor( dataset._TimeSeriesDataset__time_group_columns ).process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == formatted_value
def test_no_split(dataset_name: str): events = get_dataset(dataset_name) query = Query( { "selected_columns": ["event_id"], "conditions": [""], "orderby": "event_id", "sample": 10, "limit": 100, "offset": 50, }, events.get_dataset_schemas().get_read_schema().get_data_source() ) @split_query def do_query(dataset: Dataset, request: Request, timer: Timer): assert request.query == query request = Request( query, RequestSettings(False, False, False), {}, ) do_query(events, request, None)
def test_prewhere(initial_table, consistent, expected_table) -> None: state.set_config("enable_events_readonly_table", True) body = { "conditions": [ ["d", "=", "1"], ["c", "=", "3"], ["a", "=", "1"], ["b", "=", "2"], ], } cols = ColumnSet([("col", String())]) query = Query( body, TableSource(initial_table, cols, [["time", "=", "1"]], ["c1"]), ) request_settings = HTTPRequestSettings(consistent=consistent) processor = ReadOnlyTableSelector("sentry_dist", "sentry_ro") processor.process_query(query, request_settings) source = query.get_data_source() assert isinstance(source, TableSource) assert source.format_from() == expected_table assert source.get_columns() == cols assert source.get_prewhere_candidates() == ["c1"] assert source.get_mandatory_conditions() == [["time", "=", "1"]]
def test_order_by(self): """ Order by in Snuba are represented as -COL_NAME when ordering DESC. since the column is provided with the `-` character in front when reaching the column_expr call, this can introduce a ton of corner cases depending whether the column is aliased, whether it gets processed into something else or whether it is escaped. This test is supposed to cover those cases. """ source = self.dataset.get_dataset_schemas().get_read_schema().get_data_source() query = Query({}, source) # Columns that start with a negative sign (used in orderby to signify # sort order) retain the '-' sign outside the escaping backticks (if any) assert ( column_expr(self.dataset, "-timestamp", deepcopy(query), ParsingContext()) == "-timestamp" ) assert ( column_expr( self.dataset, "-sentry:release", deepcopy(query), ParsingContext() ) == "-`sentry:release`" ) context = ParsingContext() context.add_alias("al1") assert ( column_expr(self.dataset, "-timestamp", deepcopy(query), context, "al1") == "-al1" ) assert ( column_expr( self.dataset, "-timestamp", deepcopy(query), ParsingContext(), "al1" ) == "-(timestamp AS al1)" ) assert ( column_expr( self.dataset, "-exception_stacks.type", deepcopy(query), ParsingContext(), ) == "-(exception_stacks.type AS `exception_stacks.type`)" ) context = ParsingContext() context.add_alias("`exception_stacks.type`") assert ( column_expr( self.dataset, "-exception_stacks.type", deepcopy(query), context, ) == "-`exception_stacks.type`" )
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = Query( { "sample": 0.1, "totals": True, "limitby": (10, "environment") }, TableSource("my_table", ColumnSet([])), selected_columns=[ Column(None, "column1", None), Column(None, "column2", "table1"), ], condition=binary_condition( None, "eq", lhs=Column(None, "column1", None), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, "column1", None), Column(None, "column2", "table1") ], having=binary_condition( None, "eq", lhs=Column(None, "column1", None), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, "column1", None)) ], array_join=Column(None, "column1", None), ) query.set_final(True) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = AstClickhouseQuery(query, request_settings) expected = ("SELECT column1, table1.column2 " "FROM my_table FINAL SAMPLE 0.1 " "ARRAY JOIN column1 " "WHERE eq(column1, 'blabla') " "GROUP BY (column1, table1.column2) WITH TOTALS " "HAVING eq(column1, 123) " "ORDER BY column1 ASC " "LIMIT 10 BY environment " "LIMIT 100 OFFSET 50") assert clickhouse_query.format_sql() == expected
def test_project_extension_query_processing(raw_data: dict, expected_conditions: Sequence[Condition]): extension = ProjectExtension( processor=ProjectExtensionProcessor() ) valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({ "conditions": [] }) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) extension.get_processor().process_query(query, valid_data, request_settings) assert query.get_conditions() == expected_conditions