def __get_filter_tags(self, query: Query) -> List[str]: """ Identifies the tag names we can apply the arrayFilter optimization on. Which means: if the tags_key column is in the select clause and there are one or more top level conditions on the tags_key column. """ if not state.get_config("ast_tag_processor_enabled", 0): return [] select_clause = query.get_selected_columns_from_ast() or [] tags_key_found = any(col.column_name == "tags_key" for expression in select_clause for col in expression if isinstance(col, Column)) if not tags_key_found: return [] def extract_tags_from_condition( cond: Optional[Expression], ) -> Optional[List[str]]: if not cond: return [] if any( is_binary_condition(cond, BooleanFunctions.OR) for cond in cond): return None return self.__extract_top_level_tag_conditions(cond) cond_tags_key = extract_tags_from_condition( query.get_condition_from_ast()) if cond_tags_key is None: # This means we found an OR. Cowardly we give up even though there could # be cases where this condition is still optimizable. return [] having_tags_key = extract_tags_from_condition( query.get_having_from_ast()) if having_tags_key is None: # Same as above return [] return cond_tags_key + having_tags_key
def __init__( self, query: Query, settings: RequestSettings, ) -> None: # Snuba query structure # Referencing them here directly since it makes it easier # to process this query independently from the Snuba Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" # Clickhouse specific fields. Some are still in the Snuba # query and have to be moved. self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() # TODO: Pre where processing will become a step in Clickhouse Query processing # instead of being pulled from the Snuba Query self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__formatted_query: Optional[str] = None
def test_apdex_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), FunctionCall( "perf", "apdex", (Column(None, "column1", None), Literal(None, 300)) ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), div( plus( FunctionCall( None, "countIf", ( binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), Literal(None, 300), ), ), ), div( FunctionCall( None, "countIf", ( binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GT, Column(None, "column1", None), Literal(None, 300), ), binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), multiply(Literal(None, 300), Literal(None, 4)), ), ), ), ), Literal(None, 2), ), ), FunctionCall(None, "count", (),), ), ], ) ApdexProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == ( "div(plus(countIf(lessOrEquals(column1, 300)), " "div(countIf(and(greater(column1, 300), " "lessOrEquals(column1, multiply(300, 4)))), 2)), count())" )
def test_impact_format_expressions() -> None: unprocessed = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), FunctionCall( "perf", "impact", ( Column(None, "column1", None), Literal(None, 300), Column(None, "user", None), ), ), ], ) expected = Query( {}, TableSource("events", ColumnSet([])), selected_columns=[ Column(None, "column2", None), plus( minus( Literal(None, 1), div( plus( countIf( binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), Literal(None, 300), ), ), div( countIf( binary_condition( None, BooleanFunctions.AND, binary_condition( None, ConditionFunctions.GT, Column(None, "column1", None), Literal(None, 300), ), binary_condition( None, ConditionFunctions.LTE, Column(None, "column1", None), multiply( Literal(None, 300), Literal(None, 4) ), ), ), ), Literal(None, 2), ), ), count(), ), ), multiply( minus( Literal(None, 1), div( Literal(None, 1), FunctionCall( None, "sqrt", ( FunctionCall( None, "uniq", Column( alias=None, column_name="user", table_name=None, ), ) ), ), ), ), Literal(None, 3), ), ), ], ) ImpactProcessor().process_query(unprocessed, HTTPRequestSettings()) assert ( expected.get_selected_columns_from_ast() == unprocessed.get_selected_columns_from_ast() ) ret = unprocessed.get_selected_columns_from_ast()[1].accept( ClickhouseExpressionFormatter() ) assert ret == ( "plus(minus(1, div(plus(countIf(lessOrEquals(column1, 300)), " "div(countIf(and(greater(column1, 300), lessOrEquals(column1, " "multiply(300, 4)))), 2)), count())), " "multiply(minus(1, div(1, sqrt(user, uniq(user)))), 3))" )
def test_replace_expression(): """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "c1", "t1") column2 = Column(None, "c2", "t1") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2,)) condition = binary_condition( None, ConditionFunctions.EQ, function_1, Literal(None, "1") ) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[function_1], array_join=None, condition=condition, groupby=[function_1], having=None, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),)) return exp query.transform_expressions(replace) expected_query = Query( {}, TableSource("my_table", ColumnSet([])), selected_columns=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], array_join=None, condition=binary_condition( None, ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], having=None, order_by=[orderby], ) assert ( query.get_selected_columns_from_ast() == expected_query.get_selected_columns_from_ast() ) assert query.get_condition_from_ast() == expected_query.get_condition_from_ast() assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast() assert query.get_having_from_ast() == expected_query.get_having_from_ast() assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast() assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions() )