def process_query(self, query: Query, request_settings: RequestSettings) -> None: def transform_nested_column(exp: Expression) -> Expression: subscript = match_subscriptable_reference(exp) if subscript is None: return exp if subscript.column_name in self.__specs: promoted_col_name = self.__specs[subscript.column_name].get( subscript.key) if promoted_col_name is not None: col_type = (query.get_from_clause().get_columns().get( promoted_col_name, None)) col_type_name = str(col_type) if col_type else None # We need to pass the content of the promoted column to a toString # function when the promoted column is not a string since the # supported values of mapping columns are strings and the clients # expect such. if not self.__cast_to_string or ( col_type_name and "String" in col_type_name and "FixedString" not in col_type_name): return Column(exp.alias, subscript.table_name, promoted_col_name) else: return FunctionCall( exp.alias, "toString", (Column(None, subscript.table_name, promoted_col_name), ), ) return exp query.transform_expressions(transform_nested_column)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "group_id": return FunctionCall( exp.alias, "nullIf", ( Column(None, exp.table_name, exp.column_name), Literal(None, 0), ), ) elif exp.column_name == "message": # Because of the rename from message->search_message without backfill, # records will have one or the other of these fields. # TODO this can be removed once all data has search_message filled in. return FunctionCall( exp.alias, "coalesce", ( Column(None, exp.table_name, "search_message"), Column(None, exp.table_name, exp.column_name), ), ) return exp query.transform_expressions(process_column)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: array_joins_in_query = self.__get_array_joins_in_query(query) tuple_alias = self.__get_unused_alias(query) single_filtered, multiple_filtered = self.get_filtered_arrays( query, self.key_columns) def replace_expression(expr: Expression) -> Expression: match = self.__array_join_pattern.match(expr) # The arrayJoins we are looking for are not present, so skip this entirely if match is None: return expr # All of the possible array joins are present if array_joins_in_query == set(self.all_columns): tuple_index = self.__find_tuple_index(match.string("col")) single_index_filtered = { self.__find_tuple_index(column_name): filtered for column_name, filtered in single_filtered.items() } multiple_indices_filtered = { tuple( self.__find_tuple_index(column) for column in column_names): filtered for column_names, filtered in multiple_filtered.items() } if single_filtered or multiple_filtered: return filtered_mapping_tuples( expr.alias, tuple_alias, tuple_index, self.all_columns, single_index_filtered, multiple_indices_filtered, ) return unfiltered_mapping_tuples(expr.alias, tuple_alias, tuple_index, self.all_columns) # Only array join present is one of the key columns elif len(array_joins_in_query) == 1 and any( column in array_joins_in_query for column in self.key_columns): column_name = array_joins_in_query.pop() if column_name in single_filtered: return filtered_mapping_keys(expr.alias, column_name, single_filtered[column_name]) # No viable optimization return expr query.transform_expressions(replace_expression)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: def replace_expression(expr: Expression) -> Expression: match = self.__array_has_pattern.match(expr) # The has condition we are looking for are not present, so skip this entirely if match is None: return expr return match.expression("has") query.transform_expressions(replace_expression)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: query.transform_expressions( self._process_expressions, skip_transform_condition=True ) condition = query.get_condition() if condition is not None: processed = condition.transform(self.__process_optimizable_condition) if processed == condition: processed = condition.transform(self._process_expressions) query.set_ast_condition(processed)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: matcher = FunctionCall( String("arrayElement"), ( Column( None, String("contexts.value"), ), FunctionCall( String("indexOf"), ( Column(None, String("contexts.key")), Literal( Or([ String("device.simulator"), String("device.online"), String("device.charging"), ]), ), ), ), ), ) def process_column(exp: Expression) -> Expression: match = matcher.match(exp) if match: inner = replace(exp, alias=None) return FunctionCallExpr( exp.alias, "if", ( binary_condition( ConditionFunctions.IN, inner, literals_tuple( None, [ LiteralExpr(None, "1"), LiteralExpr(None, "True") ], ), ), LiteralExpr(None, "True"), LiteralExpr(None, "False"), ), ) return exp query.transform_expressions(process_column)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "user": return FunctionCall( exp.alias, "nullIf", (Column(None, None, "user"), Literal(None, "")), ) return exp query.transform_expressions(process_column)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: def replace_exp(exp: Expression) -> Expression: matcher = FunctionCall( String("notEquals"), (Column(None, String("type")), Literal(String("transaction"))), ) if matcher.match(exp): return LiteralExpr(None, 1) return exp query.transform_expressions(replace_exp)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: query.transform_expressions(self._process_expressions, skip_transform_condition=True) condition = query.get_condition() if condition is not None: if self.__contains_unoptimizable_condition(condition): processed = condition.transform(self._process_expressions) else: processed = condition.transform( self.__process_optimizable_condition) if condition == processed: processed = processed.transform(self._process_expressions) query.set_ast_condition(processed)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "group_id": return FunctionCall( exp.alias, "nullIf", ( Column(None, exp.table_name, exp.column_name), Literal(None, 0), ), ) return exp query.transform_expressions(process_column)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: # We care only of promoted contexts, so we do not need to match # the original nested expression. matcher = FunctionCall( String("toString"), ( Column( None, Or( [ String("device_simulator"), String("device_online"), String("device_charging"), ] ), ), ), ) def replace_exp(exp: Expression) -> Expression: if matcher.match(exp) is not None: inner = replace(exp, alias=None) return FunctionCallExpr( exp.alias, "multiIf", ( binary_condition( None, ConditionFunctions.EQ, inner, Literal(None, "") ), Literal(None, ""), binary_condition( None, ConditionFunctions.IN, inner, literals_tuple( None, [Literal(None, "1"), Literal(None, "True")] ), ), Literal(None, "True"), Literal(None, "False"), ), ) return exp query.transform_expressions(replace_exp)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: def process_column(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name == "event_id": return FunctionCall( exp.alias, "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"),), ), Literal(None, "-"), Literal(None, ""), ), ) return exp query.transform_expressions(process_column)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: def cast_column_to_nullable(exp: Expression) -> Expression: if isinstance(exp, Column): if exp.column_name in self.mismatched_null_columns: # depending on the order of the storage, this dictionary will contain # either the nullable or non-nullable version of the column. No matter # which one is in there, due to the mismatch on the merge table it needs to # be cast as nullable anyways mismatched_column = self.mismatched_null_columns[ exp.column_name] col_is_nullable = _col_is_nullable(mismatched_column) col_type = mismatched_column.type.for_schema() cast_str = col_type if col_is_nullable else f"Nullable({col_type})" return FunctionCall( exp.alias, "cast", ( # move the alias up to the cast function Column( None, table_name=exp.table_name, column_name=exp.column_name, ), Literal(None, cast_str), ), ) return exp def transform_aggregate_functions_with_mismatched_nullable_parameters( exp: Expression, ) -> Expression: if (isinstance(exp, FunctionCall) and exp.function_name in AGGREGATION_FUNCTIONS): return exp.transform(cast_column_to_nullable) return exp query.transform_expressions( transform_aggregate_functions_with_mismatched_nullable_parameters)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: arrayjoin_pattern = FunctionCall( String("arrayJoin"), (Column(column_name=Param( "col", Or([ String(key_column(self.__column_name)), String(val_column(self.__column_name)), ]), ), ), ), ) arrayjoins_in_query = set() for e in query.get_all_expressions(): match = arrayjoin_pattern.match(e) if match is not None: arrayjoins_in_query.add(match.string("col")) filtered_keys = [ LiteralExpr(None, key) for key in get_filtered_mapping_keys(query, self.__column_name) ] # Ensures the alias we apply to the arrayJoin is not already taken. used_aliases = {exp.alias for exp in query.get_all_expressions()} pair_alias_root = f"snuba_all_{self.__column_name}" pair_alias = pair_alias_root index = 0 while pair_alias in used_aliases: index += 1 pair_alias = f"{pair_alias_root}_{index}" def replace_expression(expr: Expression) -> Expression: """ Applies the appropriate optimization on a single arrayJoin expression. """ match = arrayjoin_pattern.match(expr) if match is None: return expr if arrayjoins_in_query == { key_column(self.__column_name), val_column(self.__column_name), }: # Both arrayJoin(col.key) and arrayJoin(col.value) expressions # present int the query. Do the arrayJoin on key-value pairs # instead of independent arrayjoin for keys and values. array_index = (LiteralExpr( None, 1) if match.string("col") == key_column( self.__column_name) else LiteralExpr(None, 2)) if not filtered_keys: return _unfiltered_mapping_pairs(expr.alias, self.__column_name, pair_alias, array_index) else: return _filtered_mapping_pairs( expr.alias, self.__column_name, pair_alias, filtered_keys, array_index, ) elif filtered_keys: # Only one between arrayJoin(col.key) and arrayJoin(col.value) # is present, and it is arrayJoin(col.key) since we found # filtered keys. return _filtered_mapping_keys(expr.alias, self.__column_name, filtered_keys) else: # No viable optimization return expr query.transform_expressions(replace_expression)
def process_query(self, query: Query, query_settings: QuerySettings) -> None: query.transform_expressions(self._process_expressions)
def test_replace_expression() -> None: """ Create a query with the new AST and replaces a function with a different function replaces f1(...) with tag(f1) """ column1 = Column(None, "t1", "c1") column2 = Column(None, "t1", "c2") function_1 = FunctionCall("alias", "f1", (column1, column2)) function_2 = FunctionCall("alias", "f2", (column2,)) condition = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "1")) prewhere = binary_condition(ConditionFunctions.EQ, function_1, Literal(None, "2")) orderby = OrderBy(OrderByDirection.ASC, function_2) query = Query( Table("my_table", ColumnSet([])), selected_columns=[SelectedExpression("alias", function_1)], array_join=None, condition=condition, groupby=[function_1], having=None, prewhere=prewhere, order_by=[orderby], ) def replace(exp: Expression) -> Expression: if isinstance(exp, FunctionCall) and exp.function_name == "f1": return FunctionCall(exp.alias, "tag", (Literal(None, "f1"),)) return exp query.transform_expressions(replace) expected_query = Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression( "alias", FunctionCall("alias", "tag", (Literal(None, "f1"),)) ) ], array_join=None, condition=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "1"), ), groupby=[FunctionCall("alias", "tag", (Literal(None, "f1"),))], prewhere=binary_condition( ConditionFunctions.EQ, FunctionCall("alias", "tag", (Literal(None, "f1"),)), Literal(None, "2"), ), having=None, order_by=[orderby], ) assert query.get_selected_columns() == expected_query.get_selected_columns() assert query.get_condition() == expected_query.get_condition() assert query.get_groupby() == expected_query.get_groupby() assert query.get_having() == expected_query.get_having() assert query.get_orderby() == expected_query.get_orderby() assert list(query.get_all_expressions()) == list( expected_query.get_all_expressions() )
def process_query(self, query: Query, request_settings: RequestSettings) -> None: query.transform_expressions(self._process_expressions)