def __init__(self, query: Query, settings: RequestSettings,) -> None: # Clickhouse query structure # Referencing them here directly since it makes it easier # to process this query independently from the Clickhouse Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__sql_data_list: Optional[Sequence[Tuple[str, str]]] = None self.__formatted_query: Optional[str] = None self.__sql_data: Optional[Mapping[str, str]] = None
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 5) set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS) PostReplacementConsistencyEnforcer("project_id", ReplacerState.EVENTS).process_query( query, HTTPRequestSettings()) assert query.get_condition_from_ast() == FunctionCall( None, BooleanFunctions.AND, ( FunctionCall( None, "notIn", ( FunctionCall(None, "assumeNotNull", (Column(None, None, "group_id"), )), FunctionCall( None, "tuple", ( Literal(None, 100), Literal(None, 101), Literal(None, 102), ), ), ), ), build_in("project_id", [2]), ), ) assert not query.get_final()
def test_without_turbo_without_projects_needing_final( query: ClickhouseQuery) -> None: PostReplacementConsistencyEnforcer("project_id", None).process_query( query, HTTPRequestSettings()) assert query.get_condition_from_ast() == build_in("project_id", [2]) assert not query.get_final()
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None: state.set_config("max_group_ids_exclude", 2) set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS) PostReplacementConsistencyEnforcer("project_id", ReplacerState.EVENTS).process_query( query, HTTPRequestSettings()) assert query.get_condition_from_ast() == build_in("project_id", [2]) assert query.get_final()
def test_without_turbo_with_projects_needing_final( query: ClickhouseQuery) -> None: set_project_needs_final(2, ReplacerState.EVENTS) PostReplacementConsistencyEnforcer("project_id", ReplacerState.EVENTS).process_query( query, HTTPRequestSettings()) assert query.get_conditions() == [("project_id", "IN", [2])] assert query.get_condition_from_ast() == build_in("project_id", [2]) assert query.get_final()
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, from_date: datetime, to_date: datetime, referrer: str, clickhouse_query: Query, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ # TODO: This function (well, it will be a wrapper of this function) # where we will transform the result according to the SelectedExpression # object in the query to ensure the fields in the QueryResult have # the same name the user expects. source = clickhouse_query.get_data_source().format_from() with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = AstSqlQuery(clickhouse_query, request_settings) span.set_data("query", formatted_query.sql_data()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": source, "final": clickhouse_query.get_final(), "referrer": referrer, "num_days": (to_date - from_date).days, "sample": clickhouse_query.get_sample(), } with sentry_sdk.start_span( description=formatted_query.format_sql(), op="db" ) as span: span.set_tag("table", source) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, from_date: datetime, to_date: datetime, referrer: str, clickhouse_query: Query, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ source = clickhouse_query.get_from_clause().format_from() with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = AstSqlQuery(clickhouse_query, request_settings) span.set_data("query", formatted_query.sql_data()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": source, "final": clickhouse_query.get_final(), "referrer": referrer, "num_days": (to_date - from_date).days, "sample": clickhouse_query.get_sample(), } with sentry_sdk.start_span(description=formatted_query.format_sql(), op="db") as span: span.set_tag("table", source) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )