def __init__(self, query: Query, settings: RequestSettings,) -> None: # Clickhouse query structure # Referencing them here directly since it makes it easier # to process this query independently from the Clickhouse Query # and there is no risk in doing so since they are immutable. self.__selected_columns = query.get_selected_columns_from_ast() self.__condition = query.get_condition_from_ast() self.__groupby = query.get_groupby_from_ast() self.__having = query.get_having_from_ast() self.__orderby = query.get_orderby_from_ast() self.__data_source = query.get_data_source() self.__arrayjoin = query.get_arrayjoin_from_ast() self.__granularity = query.get_granularity() self.__limit = query.get_limit() self.__limitby = query.get_limitby() self.__offset = query.get_offset() if self.__having: assert self.__groupby, "found HAVING clause with no GROUP BY" self.__turbo = settings.get_turbo() self.__final = query.get_final() self.__sample = query.get_sample() self.__hastotals = query.has_totals() self.__prewhere = query.get_prewhere_ast() self.__settings = settings self.__sql_data_list: Optional[Sequence[Tuple[str, str]]] = None self.__formatted_query: Optional[str] = None self.__sql_data: Optional[Mapping[str, str]] = None
def execute_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Query, request_settings: RequestSettings, formatted_query: SqlQuery, reader: Reader[SqlQuery], timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: """ Execute a query and return a result. """ # Experiment, if we are going to grab more than X columns worth of data, # don't use uncompressed_cache in ClickHouse. uc_max = state.get_config("uncompressed_cache_max_cols", 5) if (len( set(( # Skip aliases when counting columns (c.table_name, c.column_name) for c in clickhouse_query.get_all_ast_referenced_columns()))) > uc_max): query_settings["use_uncompressed_cache"] = 0 # Force query to use the first shard replica, which # should have synchronously received any cluster writes # before this query is run. consistent = request_settings.get_consistent() stats["consistent"] = consistent if consistent: query_settings["load_balancing"] = "in_order" query_settings["max_threads"] = 1 result = reader.execute( formatted_query, query_settings, with_totals=clickhouse_query.has_totals(), ) timer.mark("execute") stats.update({ "result_rows": len(result["data"]), "result_cols": len(result["meta"]) }) return result
def test_query_parameters() -> None: query = Query( Table("my_table", ColumnSet([])), limitby=(100, "environment"), limit=100, offset=50, totals=True, granularity=60, ) assert query.get_limitby() == (100, "environment") assert query.get_limit() == 100 assert query.get_offset() == 50 assert query.has_totals() is True assert query.get_granularity() == 60 assert query.get_from_clause().table_name == "my_table"