예제 #1
0
    def __init__(self, query: Query, settings: RequestSettings,) -> None:
        # Clickhouse query structure
        # Referencing them here directly since it makes it easier
        # to process this query independently from the Clickhouse Query
        # and there is no risk in doing so since they are immutable.
        self.__selected_columns = query.get_selected_columns_from_ast()
        self.__condition = query.get_condition_from_ast()
        self.__groupby = query.get_groupby_from_ast()
        self.__having = query.get_having_from_ast()
        self.__orderby = query.get_orderby_from_ast()
        self.__data_source = query.get_data_source()
        self.__arrayjoin = query.get_arrayjoin_from_ast()
        self.__granularity = query.get_granularity()
        self.__limit = query.get_limit()
        self.__limitby = query.get_limitby()
        self.__offset = query.get_offset()

        if self.__having:
            assert self.__groupby, "found HAVING clause with no GROUP BY"

        self.__turbo = settings.get_turbo()
        self.__final = query.get_final()
        self.__sample = query.get_sample()
        self.__hastotals = query.has_totals()
        self.__prewhere = query.get_prewhere_ast()

        self.__settings = settings
        self.__sql_data_list: Optional[Sequence[Tuple[str, str]]] = None
        self.__formatted_query: Optional[str] = None
        self.__sql_data: Optional[Mapping[str, str]] = None
예제 #2
0
def execute_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Query,
    request_settings: RequestSettings,
    formatted_query: SqlQuery,
    reader: Reader[SqlQuery],
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
) -> Result:
    """
    Execute a query and return a result.
    """
    # Experiment, if we are going to grab more than X columns worth of data,
    # don't use uncompressed_cache in ClickHouse.
    uc_max = state.get_config("uncompressed_cache_max_cols", 5)
    if (len(
            set((
                # Skip aliases when counting columns
                (c.table_name, c.column_name)
                for c in clickhouse_query.get_all_ast_referenced_columns()))) >
            uc_max):
        query_settings["use_uncompressed_cache"] = 0

    # Force query to use the first shard replica, which
    # should have synchronously received any cluster writes
    # before this query is run.
    consistent = request_settings.get_consistent()
    stats["consistent"] = consistent
    if consistent:
        query_settings["load_balancing"] = "in_order"
        query_settings["max_threads"] = 1

    result = reader.execute(
        formatted_query,
        query_settings,
        with_totals=clickhouse_query.has_totals(),
    )

    timer.mark("execute")
    stats.update({
        "result_rows": len(result["data"]),
        "result_cols": len(result["meta"])
    })

    return result
예제 #3
0
def test_query_parameters() -> None:
    query = Query(
        Table("my_table", ColumnSet([])),
        limitby=(100, "environment"),
        limit=100,
        offset=50,
        totals=True,
        granularity=60,
    )

    assert query.get_limitby() == (100, "environment")
    assert query.get_limit() == 100
    assert query.get_offset() == 50
    assert query.has_totals() is True
    assert query.get_granularity() == 60

    assert query.get_from_clause().table_name == "my_table"