def extract_granularity_from_query(query: Query, column: str) -> Optional[int]: """ This extracts the `granularity` from the `groupby` statement of the query. The matches are essentially the reverse of `TimeSeriesProcessor.__group_time_function`. """ groupby = query.get_groupby() column_match = ColumnMatch(None, String(column)) fn_match = FunctionCallMatch( Param( "time_fn", Or( [ String("toStartOfHour"), String("toStartOfMinute"), String("toStartOfDay"), String("toDate"), ] ), ), (column_match,), with_optionals=True, ) expr_match = FunctionCallMatch( String("toDateTime"), ( FunctionCallMatch( String("multiply"), ( FunctionCallMatch( String("intDiv"), ( FunctionCallMatch(String("toUInt32"), (column_match,)), LiteralMatch(Param("granularity", Any(int))), ), ), LiteralMatch(Param("granularity", Any(int))), ), ), LiteralMatch(Any(str)), ), ) for top_expr in groupby: for expr in top_expr: result = fn_match.match(expr) if result is not None: return GRANULARITY_MAPPING[result.string("time_fn")] result = expr_match.match(expr) if result is not None: return result.integer("granularity") return None
def test_pattern_replacer_format_expressions( unprocessed: Query, expected: Query ) -> None: def transform(match: MatchResult, exp: Expression) -> Expression: assert isinstance(exp, Column) # mypy return FunctionCall( None, "nullIf", (Column(None, None, exp.column_name), Literal(None, ""),) ) PatternReplacer( Param("column", ColumnMatch(None, StringMatch("column1"))), transform, ).process_query(unprocessed, HTTPRequestSettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns()
def formatted_uuid_pattern(self, suffix: str = "") -> FunctionCallMatch: return FunctionCallMatch( String("replaceAll"), ( FunctionCallMatch( String("toString"), ( Param( "formatted_uuid_column" + suffix, ColumnMatch(None, self.__uuid_column_match), ), ), ), ), with_optionals=True, )
def __init__(self, columns: Set[str]): self.columns = columns column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) operator = Param( "operator", Or( [ String(op) for op in FUNCTION_TO_OPERATOR if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN) ] ), ) in_operators = Param( "operator", Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))), ) col = Param("col", ColumnMatch(None, column_match)) self.__condition_matcher = Or( [ FunctionCallMatch(operator, (literal, col)), FunctionCallMatch(operator, (col, literal)), FunctionCallMatch(Param("operator", String("has")), (col, literal)), ] ) self.__in_condition_matcher = FunctionCallMatch( in_operators, ( col, Param( "tuple", FunctionCallMatch(String("tuple"), all_parameters=LiteralMatch()), ), ), )
def build_match( col: str, ops: Sequence[str], param_type: Any ) -> Or[Expression]: # The IN condition has to be checked separately since each parameter # has to be checked individually. column_match = ColumnMatch(alias_match, StringMatch(col)) return Or( [ FunctionCallMatch( Or([StringMatch(op) for op in ops]), (column_match, LiteralMatch(AnyMatch(param_type))), ), FunctionCallMatch( StringMatch(ConditionFunctions.IN), ( column_match, FunctionCallMatch( Or([StringMatch("array"), StringMatch("tuple")]), all_parameters=LiteralMatch(AnyMatch(param_type)), ), ), ), ] )
selected_entity = match_query_to_entity(query, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS) track_bad_query(query, selected_entity, EVENTS_COLUMNS, TRANSACTIONS_COLUMNS) return selected_entity metrics = MetricsWrapper(environment.metrics, "api.discover") logger = logging.getLogger(__name__) EVENT_CONDITION = FunctionCallMatch( Param("function", Or([StringMatch(op) for op in BINARY_OPERATORS])), ( Or([ColumnMatch(None, StringMatch("type")), LiteralMatch(None)]), Param("event_type", Or([ColumnMatch(), LiteralMatch()])), ), ) TRANSACTION_FUNCTIONS = FunctionCallMatch( Or([StringMatch("apdex"), StringMatch("failure_rate")]), None) EVENT_FUNCTIONS = FunctionCallMatch( Or([StringMatch("isHandled"), StringMatch("notHandled")]), None) def match_query_to_entity(
def __init__(self, time_group_columns: Mapping[str, str], time_parse_columns: Sequence[str]) -> None: # Column names that should be mapped to different columns. self.__time_replace_columns = time_group_columns # time_parse_columns is a list of columns that, if used in a condition, should be compared with datetimes. # The columns here might overlap with the columns that get replaced, so we have to search for transformed # columns. column_match = ColumnMatch( None, Param( "column_name", Or([String(tc) for tc in time_parse_columns]), ), ) self.condition_match = FunctionCallMatch( Or([ String(ConditionFunctions.GT), String(ConditionFunctions.GTE), String(ConditionFunctions.LT), String(ConditionFunctions.LTE), String(ConditionFunctions.EQ), String(ConditionFunctions.NEQ), ]), ( Or([ column_match, FunctionCallMatch( Or([ String("toStartOfHour"), String("toStartOfMinute"), String("toStartOfDay"), String("toDate"), ]), (column_match, ), with_optionals=True, ), FunctionCallMatch( String("toDateTime"), ( FunctionCallMatch( String("multiply"), ( FunctionCallMatch( String("intDiv"), ( FunctionCallMatch( String("toUInt32"), (column_match, ), ), LiteralMatch(Any(int)), ), ), LiteralMatch(Any(int)), ), ), LiteralMatch(Any(str)), ), ), ]), Param("literal", LiteralMatch(Any(str))), ), )
def __init__(self, columns: Set[str], optimize_ordering: bool = False): self.columns = columns self.optimize_ordering = optimize_ordering column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) ordering_operators = ( ConditionFunctions.GT, ConditionFunctions.GTE, ConditionFunctions.LT, ConditionFunctions.LTE, ) operator = Param( "operator", Or([ String(op) for op in ( ConditionFunctions.EQ, ConditionFunctions.NEQ, ConditionFunctions.IS_NULL, ConditionFunctions.IS_NOT_NULL, *(ordering_operators if self.optimize_ordering else ()), ) ]), ) unoptimizable_operator = Param( "operator", Or([ String(op) for op in ( ConditionFunctions.LIKE, ConditionFunctions.NOT_LIKE, *(() if self.optimize_ordering else ordering_operators), ) ]), ) in_operators = Param( "operator", Or((String(ConditionFunctions.IN), String(ConditionFunctions.NOT_IN))), ) col = Param("col", ColumnMatch(None, column_match)) self.__condition_matcher = Or([ FunctionCallMatch(operator, (literal, col)), FunctionCallMatch(operator, (col, literal)), FunctionCallMatch(Param("operator", String("has")), (col, literal)), ]) self.__in_condition_matcher = FunctionCallMatch( in_operators, ( col, Param( "tuple", FunctionCallMatch(String("tuple"), all_parameters=LiteralMatch()), ), ), ) self.__unoptimizable_condition_matcher = Or([ FunctionCallMatch(unoptimizable_operator, (literal, col)), FunctionCallMatch(unoptimizable_operator, (col, literal)), ])
result = DATETIME_MATCH.match(exp) if result is not None: date_string = result.expression("date_string") assert isinstance(date_string, Literal) # mypy assert isinstance(date_string.value, str) # mypy return Literal(exp.alias, parse_datetime(date_string.value)) return exp query.transform_expressions(parse) ARRAY_JOIN_MATCH = FunctionCallMatch( Param("function_name", Or([StringMatch("arrayExists"), StringMatch("arrayAll")])), ( Param("column", ColumnMatch(AnyOptionalString(), AnyMatch(str))), Param("op", Or([LiteralMatch(StringMatch(op)) for op in OPERATOR_TO_FUNCTION])), Param("value", AnyExpression()), ), ) def _array_join_transformation( query: Union[CompositeQuery[QueryEntity], LogicalQuery] ) -> None: def parse(exp: Expression) -> Expression: result = ARRAY_JOIN_MATCH.match(exp) if result: function_name = result.string("function_name") column = result.expression("column") assert isinstance(column, Column)