def __init__(self, query: Query, settings: RequestSettings,) -> None: # Clickhouse query structure # Referencing them here directly since it makes it easier # to process this query independently from the Clickhouse Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__sql_data_list: Optional[Sequence[Tuple[str, str]]] = None self.__formatted_query: Optional[str] = None self.__sql_data: Optional[Mapping[str, str]] = None
def _list_array_join(query: Query) -> Columnset: ret = set() query_arrayjoin = query.get_arrayjoin_from_ast() if query_arrayjoin is not None: ret |= _get_columns_from_expression(query_arrayjoin) for e in query.get_all_expressions(): if isinstance(e, FunctionCallExpr) and e.function_name == "arrayJoin": ret |= _get_columns_from_expression(e) return ret
def test_translation(mappers: TranslationMappers, query: SnubaQuery, expected: ClickhouseQuery) -> None: translated = QueryTranslator(mappers).translate(query) # TODO: consider providing an __eq__ method to the Query class. Or turn it into # a dataclass. assert (expected.get_selected_columns_from_ast() == translated.get_selected_columns_from_ast()) assert expected.get_groupby_from_ast() == translated.get_groupby_from_ast() assert expected.get_condition_from_ast( ) == translated.get_condition_from_ast() assert expected.get_arrayjoin_from_ast( ) == translated.get_arrayjoin_from_ast() assert expected.get_having_from_ast() == translated.get_having_from_ast() assert expected.get_orderby_from_ast() == translated.get_orderby_from_ast()
def process_query(self, query: Query, request_settings: RequestSettings) -> None: conditions = query.get_conditions() if not conditions: return # Enable the processor only if we have enough data in the flattened # columns. Which have been deployed at BEGINNING_OF_TIME. If the query # starts earlier than that we do not apply the optimization. if self.__beginning_of_time: apply_optimization = False for condition in conditions: if (is_condition(condition) and isinstance(condition[0], str) and condition[0] in self.__timestamp_cols and condition[1] in (">=", ">") and isinstance(condition[2], str)): try: start_ts = parse_datetime(condition[2]) if (start_ts - self.__beginning_of_time).total_seconds() > 0: apply_optimization = True except Exception: # We should not get here, it means the from timestamp is malformed # Returning here is just for safety logger.error( "Cannot parse start date for NestedFieldOptimizer: %r", condition, ) return if not apply_optimization: return # Do not use flattened tags if tags are being unpacked anyway. In that case # using flattened tags only implies loading an additional column thus making # the query heavier and slower if self.__has_tags(query.get_arrayjoin_from_ast()): return if query.get_groupby_from_ast(): for expression in query.get_groupby_from_ast(): if self.__has_tags(expression): return if self.__has_tags(query.get_having_from_ast()): return if query.get_orderby_from_ast(): for orderby in query.get_orderby_from_ast(): if self.__has_tags(orderby.expression): return new_conditions = [] positive_like_expression: List[str] = [] negative_like_expression: List[str] = [] for c in conditions: keyvalue = self.__is_optimizable(c, self.__nested_col) if not keyvalue: new_conditions.append(c) else: expression = f"{escape_field(keyvalue.nested_col_key)}={escape_field(keyvalue.value)}" if keyvalue.operand == Operand.EQ: positive_like_expression.append(expression) else: negative_like_expression.append(expression) if positive_like_expression: # Positive conditions "=" are all merged together in one LIKE expression positive_like_expression = sorted(positive_like_expression) like_formatted = f"%|{'|%|'.join(positive_like_expression)}|%" new_conditions.append( [self.__flattened_col, "LIKE", like_formatted]) for expression in negative_like_expression: # Negative conditions "!=" cannot be merged together. We can still transform # them into NOT LIKE statements, but each condition has to be one # statement. not_like_formatted = f"%|{expression}|%" new_conditions.append( [self.__flattened_col, "NOT LIKE", not_like_formatted]) query.set_conditions(new_conditions)