def test_aliases() -> None: # No context col1 = Column("al1", "column1", "table1") col2 = Column("al1", "column1", "table1") assert col1.accept( ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)" assert col2.accept( ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)" # With Context pc = ParsingContext() assert col1.accept( ClickhouseExpressionFormatter(pc)) == "(table1.column1 AS al1)" assert col2.accept(ClickhouseExpressionFormatter(pc)) == "al1" # Hierarchical expression inherits parsing context and applies alaises f = FunctionCall( None, "f1", ( FunctionCall("tag[something]", "tag", (Column(None, "column1", "table1"))), FunctionCall("tag[something]", "tag", (Column(None, "column1", "table1"))), FunctionCall("tag[something]", "tag", (Column(None, "column1", "table1"))), ), ) expected = "f1((tag(table1.column1) AS `tag[something]`), `tag[something]`, `tag[something]`)" assert f.accept(ClickhouseExpressionFormatter()) == expected
def test_timeseries_format_expressions( granularity: int, condition: Optional[FunctionCall], exp_column: FunctionCall, exp_condition: Optional[FunctionCall], formatted_column: str, formatted_condition: str, ) -> None: unprocessed = Query( {}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], condition=condition, granularity=granularity, ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(exp_column.alias, exp_column), ], condition=exp_condition, ) entity = TransactionsEntity() processors = entity.get_query_processors() for processor in processors: if isinstance(processor, TimeSeriesProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) assert expected.get_condition_from_ast( ) == unprocessed.get_condition_from_ast() ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_column if condition: ret = unprocessed.get_condition_from_ast().accept( ClickhouseExpressionFormatter()) assert formatted_condition == ret
def test_timeseries_column_format_expressions(granularity: int, ast_value: FunctionCall, formatted_value: str) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("my_time", Column("my_time", None, "time")), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression(ast_value.alias, ast_value), ], ) dataset = TransactionsDataset() for processor in dataset.get_query_processors(): if isinstance(processor, TimeSeriesColumnProcessor): processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_timeseries_column_format_expressions( granularity, ast_value, formatted_value ) -> None: unprocessed = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[ Column("transaction.duration", "duration", None), Column("my_start", "bucketed_start", None), ], ) expected = Query( {"granularity": granularity}, TableSource("transactions", ColumnSet([])), selected_columns=[Column("transaction.duration", "duration", None), ast_value,], ) dataset = TransactionsDataset() TimeSeriesColumnProcessor( dataset._TimeSeriesDataset__time_group_columns ).process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == formatted_value
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", (binary_condition( None, ConditionFunctions.NOT_IN, Column(None, None, "transaction_status"), FunctionCall( None, "tuple", ( Literal(alias=None, value=0), Literal(alias=None, value=1), Literal(alias=None, value=2), ), ), ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == ( "(divide(countIf(notIn(transaction_status, tuple(0, 1, 2))), count()) AS perf)" )
def test_events_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression("the_group_id", Column("the_group_id", None, "group_id")), SelectedExpression("the_message", Column("the_message", None, "message")), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression("dr_claw", Column("dr_claw", None, "culprit")), SelectedExpression( "the_group_id", FunctionCall( "the_group_id", "nullIf", ( Column(None, None, "group_id"), Literal(None, 0), ), ), ), SelectedExpression( "the_message", FunctionCall( "the_message", "coalesce", ( Column(None, None, "search_message"), Column(None, None, "message"), ), ), ), ], ) EventsColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) expected = ( "(nullIf(group_id, 0) AS the_group_id)", "(coalesce(search_message, message) AS the_message)", ) for idx, column in enumerate( unprocessed.get_selected_columns_from_ast()[1:]): formatted = column.expression.accept(ClickhouseExpressionFormatter()) assert expected[idx] == formatted
def test_failure_rate_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression("perf", FunctionCall("perf", "failure_rate", ())), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "column2")), SelectedExpression( "perf", divide( FunctionCall( None, "countIf", ( combine_and_conditions( [ binary_condition( None, ConditionFunctions.NEQ, Column(None, None, "transaction_status"), Literal(None, code), ) for code in [0, 1, 2] ] ), ), ), count(), "perf", ), ), ], ) failure_rate_processor(ColumnSet([])).process_query( unprocessed, HTTPRequestSettings() ) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert ret == ( "(divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS perf)" )
def test_formatting() -> None: """ Validates the formatting of the arrayFilter expressions. """ assert tupleElement( "tags_key", arrayJoin( "snuba_all_tags", zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"), ), ), Literal(None, 1), ).accept(ClickhouseExpressionFormatter()) == ( "(tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), " "tags.key, tags.value)) AS snuba_all_tags), 1) AS tags_key)") assert tupleElement( "tags_key", arrayJoin( "snuba_all_tags", filter_key_values( zip_columns( Column(None, None, "tags.key"), Column(None, None, "tags.value"), ), [Literal(None, "t1"), Literal(None, "t2")], ), ), Literal(None, 1), ).accept(ClickhouseExpressionFormatter()) == ( "(tupleElement((arrayJoin(arrayFilter((pair -> in(" "tupleElement(pair, 1), tuple('t1', 't2'))), " "arrayMap((x, y -> tuple(x, y)), tags.key, tags.value))) AS snuba_all_tags), 1) AS tags_key)" )
def test_transaction_column_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", Column("the_event_id", None, "event_id") ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration") ), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"),), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) TransactionColumnProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) formatted = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter() ) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
def test_escaping(expression: Expression, expected: str) -> None: visitor = ClickhouseExpressionFormatter() assert expression.accept(visitor) == expected
def _sql_data_list(self) -> Sequence[Tuple[str, str]]: if self.__sql_data_list: return self.__sql_data_list parsing_context = ParsingContext() formatter = ClickhouseExpressionFormatter(parsing_context) selected_cols = [ e.expression.accept(formatter) for e in self.__selected_columns ] select_clause = f"SELECT {', '.join(selected_cols)}" # TODO: The visitor approach will be used for the FROM clause as well. from_clause = f"FROM {self.__data_source.format_from()}" if self.__final: from_clause = f"{from_clause} FINAL" # TODO: Sampling rate will become one step of Clickhouse query processing if not self.__data_source.supports_sample(): sample_rate = None else: if self.__sample: sample_rate = self.__sample elif self.__settings.get_turbo(): sample_rate = settings.TURBO_SAMPLE_RATE else: sample_rate = None if sample_rate: from_clause = f"{from_clause} SAMPLE {sample_rate}" array_join_clause = "" if self.__arrayjoin: formatted_array_join = self.__arrayjoin.accept(formatter) array_join_clause = f"ARRAY JOIN {formatted_array_join}" prewhere_clause = "" if self.__prewhere: formatted_prewhere = self.__prewhere.accept(formatter) prewhere_clause = f"PREWHERE {formatted_prewhere}" where_clause = "" if self.__condition: where_clause = f"WHERE {self.__condition.accept(formatter)}" group_clause = "" if self.__groupby: # reformat to use aliases generated during the select clause formatting. groupby_expressions = [e.accept(formatter) for e in self.__groupby] group_clause = f"GROUP BY ({', '.join(groupby_expressions)})" if self.__hastotals: group_clause = f"{group_clause} WITH TOTALS" having_clause = "" if self.__having: having_clause = f"HAVING {self.__having.accept(formatter)}" order_clause = "" if self.__orderby: orderby = [ f"{e.expression.accept(formatter)} {e.direction.value}" for e in self.__orderby ] order_clause = f"ORDER BY {', '.join(orderby)}" limitby_clause = "" if self.__limitby is not None: limitby_clause = "LIMIT {} BY {}".format(*self.__limitby) limit_clause = "" if self.__limit is not None: limit_clause = f"LIMIT {self.__limit} OFFSET {self.__offset}" self.__sql_data_list = [ (k, v) for k, v in [ ("select", select_clause), ("from", from_clause), ("array_join", array_join_clause), ("prewhere", prewhere_clause), ("where", where_clause), ("group", group_clause), ("having", having_clause), ("order", order_clause), ("limitby", limitby_clause), ("limit", limit_clause), ] if v ] return self.__sql_data_list
def test_handled_processor() -> None: columnset = ColumnSet([]) unprocessed = Query( {}, TableSource("events", columnset), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "isHandled", tuple(), ), ), ], ) expected = Query( {}, TableSource("events", columnset), selected_columns=[ SelectedExpression(name=None, expression=Column(None, None, "id")), SelectedExpression( "result", FunctionCall( "result", "arrayExists", ( Lambda( None, ("x", ), binary_condition( None, BooleanFunctions.OR, FunctionCall(None, "isNull", (Argument(None, "x"), )), binary_condition( None, ConditionFunctions.EQ, FunctionCall(None, "assumeNotNull", (Argument(None, "x"), )), Literal(None, 1), ), ), ), Column(None, None, "exception_stacks.mechanism_handled"), ), ), ), ], ) processor = handled_functions.HandledFunctionsProcessor( "exception_stacks.mechanism_handled", columnset) processor.process_query(unprocessed, HTTPRequestSettings()) assert (expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast()) ret = unprocessed.get_selected_columns_from_ast()[1].expression.accept( ClickhouseExpressionFormatter()) assert ret == ( "(arrayExists((x -> (isNull(x) OR equals(assumeNotNull(x), 1))), exception_stacks.mechanism_handled) AS result)" )
def test_impact_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), FunctionCall( "perf", "impact", ( Column(None, "column1", None), Literal(None, 300), Column(None, "user", None), ), ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), plus( minus( Literal(None, 1), div( plus( countIf( binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), Literal(None, 300), ), ), div( countIf( binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GT, Column(None, "column1", None), Literal(None, 300), ), binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), multiply( Literal(None, 300), Literal(None, 4) ), ), ), ), Literal(None, 2), ), ), count(), ), ), multiply( minus( Literal(None, 1), div( Literal(None, 1), FunctionCall( None, "sqrt", ( FunctionCall( None, "uniq", Column( alias=None, column_name="user", table_name=None, ), ) ), ), ), ), Literal(None, 3), ), ), ], ) ImpactProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == ( "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), " "div(countIf(and(greater(column1, 300), lessOrEquals(column1, " "multiply(300, 4)))), 2)), count())), " "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))" )
def test_apdex_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), FunctionCall( "perf", "apdex", (Column(None, "column1", None), Literal(None, 300)) ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), div( plus( FunctionCall( None, "countIf", ( binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), Literal(None, 300), ), ), ), div( FunctionCall( None, "countIf", ( binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GT, Column(None, "column1", None), Literal(None, 300), ), binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), multiply(Literal(None, 300), Literal(None, 4)), ), ), ), ), Literal(None, 2), ), ), FunctionCall(None, "count", (),), ), ], ) ApdexProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == ( "div(plus(countIf(lessOrEquals(column1, 300)), " "div(countIf(and(greater(column1, 300), " "lessOrEquals(column1, multiply(300, 4)))), 2)), count())" )