class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper): """ If a curried function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("identity"), (LiteralMatch(), )) def attempt_map( self, expression: CurriedFunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[Union[CurriedFunctionCall, FunctionCall]]: internal_function = expression.internal_function.accept( children_translator) assert isinstance(internal_function, FunctionCall) # mypy parameters = tuple( p.accept(children_translator) for p in expression.parameters) for param in parameters: # All impossible columns that have been converted to NULL will be the identity function. # So we know that if a function has the identity function as a parameter, we can # collapse the entire expression. fmatch = self.function_match.match(param) if fmatch is not None: return identity(Literal(None, None), expression.alias) return None
class DefaultIfNullFunctionMapper(FunctionCallMapper): """ If a function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("identity"), (LiteralMatch(value=Any(type(None))), )) def attempt_map( self, expression: FunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: # HACK: Quick fix to avoid this function dropping important conditions from the query logical_functions = {"and", "or", "xor"} if expression.function_name in logical_functions: return None parameters = tuple( p.accept(children_translator) for p in expression.parameters) for param in parameters: # All impossible columns will have been converted to the identity function. # So we know that if a function has the identity function as a parameter, we can # collapse the entire expression. fmatch = self.function_match.match(param) if fmatch is not None: return identity(Literal(None, None), expression.alias) return None
def test_pattern_replacer_format_expressions( unprocessed: Query, expected: Query ) -> None: def transform(match: MatchResult, exp: Expression) -> Expression: assert isinstance(exp, Column) # mypy return FunctionCall( None, "nullIf", (Column(None, None, exp.column_name), Literal(None, ""),) ) PatternReplacer( Param("column", ColumnMatch(None, StringMatch("column1"))), transform, ).process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns()
def build_match( col: str, ops: Sequence[str], param_type: Any ) -> Or[Expression]: # The IN condition has to be checked separately since each parameter # has to be checked individually. column_match = ColumnMatch(alias_match, StringMatch(col)) return Or( [ FunctionCallMatch( Or([StringMatch(op) for op in ops]), (column_match, LiteralMatch(AnyMatch(param_type))), ), FunctionCallMatch( StringMatch(ConditionFunctions.IN), ( column_match, FunctionCallMatch( Or([StringMatch("array"), StringMatch("tuple")]), all_parameters=LiteralMatch(AnyMatch(param_type)), ), ), ), ] )
class DefaultIfNullCurriedFunctionMapper(CurriedFunctionCallMapper): """ If a curried function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("ifNull"), (LiteralMatch(), LiteralMatch())) def attempt_map( self, expression: CurriedFunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[CurriedFunctionCall]: internal_function = expression.internal_function.accept( children_translator) assert isinstance(internal_function, FunctionCall) # mypy parameters = tuple( p.accept(children_translator) for p in expression.parameters) all_null = True for param in parameters: # Handle wrapped functions that have been converted to ifNull(NULL, NULL) fmatch = self.function_match.match(param) if fmatch is None: if isinstance(param, Literal): if param.value is not None: all_null = False break else: all_null = False break if all_null and len(parameters) > 0: # Currently curried function mappers require returning other curried functions. # So return this to keep the mapper happy. return CurriedFunctionCall( alias=expression.alias, internal_function=FunctionCall( None, f"{internal_function.function_name}OrNull", internal_function.parameters, ), parameters=tuple(Literal(None, None) for p in parameters), ) return None
class DefaultIfNullFunctionMapper(FunctionCallMapper): """ If a function is being called on a column that doesn't exist, or is being called on NULL, change the entire function to be NULL. """ function_match = FunctionCallMatch(StringMatch("ifNull"), (LiteralMatch(), LiteralMatch())) def attempt_map( self, expression: FunctionCall, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: parameters = tuple( p.accept(children_translator) for p in expression.parameters) all_null = True for param in parameters: # Handle wrapped functions that have been converted to ifNull(NULL, NULL) fmatch = self.function_match.match(param) if fmatch is None: if isinstance(param, Literal): if param.value is not None: all_null = False break else: all_null = False break if all_null and len(parameters) > 0: # Currently function mappers require returning other functions. So return this # to keep the mapper happy. return FunctionCall(expression.alias, "ifNull", (Literal(None, None), Literal(None, None))) return None
# {"a": f(x), "x": g(k)} -> {"a": f(g(k)), "x": g(k)} aliased_expressions = { exp.alias: exp for exp in query.get_all_expressions() if exp.alias is not None } fully_resolved_aliases = { alias: exp.accept( AliasExpanderVisitor(aliased_expressions, [], expand_nested=True)) for alias, exp in aliased_expressions.items() } visitor = AliasExpanderVisitor(fully_resolved_aliases, []) query.transform(visitor) ARRAYJOIN_FUNCTION_MATCH = FunctionCallMatch(StringMatch("arrayJoin"), None) def _validate_arrayjoin(query: Query) -> None: # TODO: Actually validate arrayjoin. For now log how it is used. body_arrayjoin = "" arrayjoin = query.get_arrayjoin_from_ast() if arrayjoin is not None: if isinstance(arrayjoin, Column): body_arrayjoin = arrayjoin.column_name array_joins = set() if body_arrayjoin: array_joins.add(body_arrayjoin) for exp in query.get_all_expressions(): match = ARRAYJOIN_FUNCTION_MATCH.match(exp)
# since SnQL will require an entity to always be specified by the user. def select_entity(self, query: Query) -> EntityKey: selected_entity = match_query_to_entity(query, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS) track_bad_query(query, selected_entity, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS) return selected_entity metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or([ColumnMatch(None, StringMatch("type")), LiteralMatch(None)]), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) TRANSACTION_FUNCTIONS = FunctionCallMatch( Or([StringMatch("apdex"), StringMatch("failure_rate")]), None) EVENT_FUNCTIONS = FunctionCallMatch( Or([StringMatch("isHandled"), StringMatch("notHandled")]), None)
from snuba.query.subscripts import subscript_key_column_name from snuba.query.timeseries_extension import TimeSeriesExtension from snuba.request.request_settings import RequestSettings from snuba.util import parse_datetime, qualified_column from snuba.utils.metrics.wrapper import MetricsWrapper EVENTS = EntityKey.EVENTS TRANSACTIONS = EntityKey.TRANSACTIONS EVENTS_AND_TRANSACTIONS = "events_and_transactions" metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( None, Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or( [ ColumnMatch(None, None, StringMatch("type")), LiteralMatch(StringMatch("type"), None), ] ), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) def match_query_to_table( query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet ) -> Union[EntityKey, str]:
def __post_init__(self) -> None: self.function_match = FunctionCallMatch( Or([StringMatch(func) for func in self.function_names]))
def validate_required_conditions( self, query: Query, alias: Optional[str] = None ) -> bool: if not self._required_filter_columns and not self._required_time_column: return True condition = query.get_condition_from_ast() top_level = get_first_level_and_conditions(condition) if condition else [] if not top_level: return False alias_match = AnyOptionalString() if alias is None else StringMatch(alias) def build_match( col: str, ops: Sequence[str], param_type: Any ) -> Or[Expression]: # The IN condition has to be checked separately since each parameter # has to be checked individually. column_match = ColumnMatch(alias_match, StringMatch(col)) return Or( [ FunctionCallMatch( Or([StringMatch(op) for op in ops]), (column_match, LiteralMatch(AnyMatch(param_type))), ), FunctionCallMatch( StringMatch(ConditionFunctions.IN), ( column_match, FunctionCallMatch( Or([StringMatch("array"), StringMatch("tuple")]), all_parameters=LiteralMatch(AnyMatch(param_type)), ), ), ), ] ) if self._required_filter_columns: for col in self._required_filter_columns: match = build_match(col, [ConditionFunctions.EQ], int) found = any(match.match(cond) for cond in top_level) if not found: return False if self._required_time_column: match = build_match( self._required_time_column, [ConditionFunctions.EQ], datetime, ) found = any(match.match(cond) for cond in top_level) if found: return True lower, upper = get_time_range_expressions( top_level, self._required_time_column, alias ) if not lower or not upper: return False # At this point we have valid conditions. However we need to align them and # make sure they don't exceed the max_days. Replace the conditions. self._replace_time_condition(query, *lower, *upper) return True
from snuba.query.project_extension import ProjectExtension from snuba.query.subscripts import subscript_key_column_name from snuba.query.timeseries_extension import TimeSeriesExtension from snuba.request.request_settings import RequestSettings from snuba.util import qualified_column from snuba.utils.metrics.wrapper import MetricsWrapper EVENTS = EntityKey.EVENTS TRANSACTIONS = EntityKey.TRANSACTIONS EVENTS_AND_TRANSACTIONS = "events_and_transactions" metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or([ColumnMatch(None, StringMatch("type")), LiteralMatch(None)]), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) def match_query_to_table( query: Query, events_only_columns: ColumnSet, transactions_only_columns: ColumnSet) -> Union[EntityKey, str]: # First check for a top level condition on the event type condition = query.get_condition_from_ast() event_types = set() if condition:
if not isinstance(exp, Column): return exp parts = exp.column_name.split(".", 1) if len(parts) != 2 or parts[0] not in aliases: raise ParsingException( f"column {exp.column_name} must be qualified in a join query" ) return Column(exp.alias, parts[0], parts[1]) query.transform_expressions(transform) DATETIME_MATCH = FunctionCallMatch( StringMatch("toDateTime"), (Param("date_string", LiteralMatch(AnyMatch(str))),) ) def _parse_datetime_literals( query: Union[CompositeQuery[QueryEntity], LogicalQuery] ) -> None: def parse(exp: Expression) -> Expression: result = DATETIME_MATCH.match(exp) if result is not None: date_string = result.expression("date_string") assert isinstance(date_string, Literal) # mypy assert isinstance(date_string.value, str) # mypy return Literal(exp.alias, parse_datetime(date_string.value)) return exp
def transform(exp: Expression) -> Expression: if not isinstance(exp, Column): return exp parts = exp.column_name.split(".", 1) if len(parts) != 2 or parts[0] not in aliases: raise ParsingException( f"column {exp.column_name} must be qualified in a join query") return Column(exp.alias, parts[0], parts[1]) query.transform_expressions(transform) DATETIME_MATCH = FunctionCallMatch( StringMatch("toDateTime"), (Param("date_string", LiteralMatch(AnyMatch(str))), )) def _parse_datetime_literals( query: Union[CompositeQuery[QueryEntity], LogicalQuery]) -> None: def parse(exp: Expression) -> Expression: result = DATETIME_MATCH.match(exp) if result is not None: date_string = result.expression("date_string") assert isinstance(date_string, Literal) # mypy assert isinstance(date_string.value, str) # mypy return Literal(exp.alias, parse_datetime(date_string.value)) return exp