コード例 #1
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        # If the settings don't already have an object rate limit, add one
        if self._is_already_applied(query_settings):
            return
        per_second_name = self.get_per_second_name(query, query_settings)
        concurrent_name = self.get_concurrent_name(query, query_settings)
        object_rate_limit, object_concurrent_limit = get_configs(
            [
                (per_second_name, self.default_limit),
                (concurrent_name, self.default_limit),
            ]
        )
        obj_id = self.get_object_id(query, query_settings)
        if obj_id is None:
            return
        # Specific objects can have their rate limits overridden
        (per_second, concurr) = get_configs(
            [
                (f"{per_second_name}_{obj_id}", object_rate_limit),
                (f"{concurrent_name}_{obj_id}", object_concurrent_limit),
            ]
        )

        rate_limit = RateLimitParameters(
            rate_limit_name=self.rate_limit_name,
            bucket=str(obj_id),
            per_second_limit=per_second,
            concurrent_limit=concurr,
        )

        query_settings.add_rate_limit(rate_limit)
コード例 #2
0
ファイル: db_query.py プロジェクト: getsentry/snuba
def execute_query_with_rate_limits(
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    stats: MutableMapping[str, Any],
    clickhouse_query_settings: MutableMapping[str, Any],
    robust: bool,
) -> Result:
    # Global rate limiter is added at the end of the chain to be
    # the last for evaluation.
    # This allows us not to borrow capacity from the global quota
    # during the evaluation if one of the more specific limiters
    # (like the project rate limiter) rejects the query first.
    query_settings.add_rate_limit(get_global_rate_limit_params())
    # XXX: We should consider moving this that it applies to the logical query,
    # not the physical query.
    with RateLimitAggregator(
        query_settings.get_rate_limit_params()
    ) as rate_limit_stats_container:
        stats.update(rate_limit_stats_container.to_dict())
        timer.mark("rate_limit")

        project_rate_limit_stats = rate_limit_stats_container.get_stats(
            PROJECT_RATE_LIMIT_NAME
        )

        thread_quota = query_settings.get_resource_quota()
        if (
            ("max_threads" in clickhouse_query_settings or thread_quota is not None)
            and project_rate_limit_stats is not None
            and project_rate_limit_stats.concurrent > 1
        ):
            maxt = (
                clickhouse_query_settings["max_threads"]
                if thread_quota is None
                else thread_quota.max_threads
            )
            clickhouse_query_settings["max_threads"] = max(
                1, maxt - project_rate_limit_stats.concurrent + 1
            )

        _record_rate_limit_metrics(rate_limit_stats_container, reader, stats)

        return execute_query(
            clickhouse_query,
            query_settings,
            formatted_query,
            reader,
            timer,
            stats,
            clickhouse_query_settings,
            robust=robust,
        )
コード例 #3
0
ファイル: table_rate_limit.py プロジェクト: getsentry/snuba
    def process_query(self, query: Query,
                      query_settings: QuerySettings) -> None:
        table_name = query.get_from_clause().table_name
        (per_second, concurr) = get_configs([
            (f"table_per_second_limit_{table_name}{self.__suffix}", 5000),
            (f"table_concurrent_limit_{table_name}{self.__suffix}", 1000),
        ])

        rate_limit = RateLimitParameters(
            rate_limit_name=TABLE_RATE_LIMIT_NAME,
            bucket=table_name,
            per_second_limit=per_second,
            concurrent_limit=concurr,
        )

        query_settings.add_rate_limit(rate_limit)
コード例 #4
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        enabled = get_config(ENABLED_CONFIG, 1)
        if not enabled:
            return

        project_ids = get_object_ids_in_query_ast(query, self.__project_field)
        if not project_ids:
            return

        # TODO: Like for the rate limiter Add logic for multiple IDs
        project_id = str(project_ids.pop())
        thread_quota = get_config(
            f"{REFERRER_PROJECT_CONFIG}_{query_settings.referrer}_{project_id}"
        )

        if not thread_quota:
            return

        assert isinstance(thread_quota, int)
        query_settings.set_resource_quota(ResourceQuota(max_threads=thread_quota))
コード例 #5
0
ファイル: db_query.py プロジェクト: getsentry/snuba
def execute_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    stats: MutableMapping[str, Any],
    clickhouse_query_settings: MutableMapping[str, Any],
    robust: bool,
) -> Result:
    """
    Execute a query and return a result.
    """
    # Experiment, if we are going to grab more than X columns worth of data,
    # don't use uncompressed_cache in ClickHouse.
    uc_max = state.get_config("uncompressed_cache_max_cols", 5)
    assert isinstance(uc_max, int)
    column_counter = ReferencedColumnsCounter()
    column_counter.visit(clickhouse_query.get_from_clause())
    if column_counter.count_columns() > uc_max:
        clickhouse_query_settings["use_uncompressed_cache"] = 0

    # Force query to use the first shard replica, which
    # should have synchronously received any cluster writes
    # before this query is run.
    consistent = query_settings.get_consistent()
    stats["consistent"] = consistent
    if consistent:
        clickhouse_query_settings["load_balancing"] = "in_order"
        clickhouse_query_settings["max_threads"] = 1

    result = reader.execute(
        formatted_query,
        clickhouse_query_settings,
        with_totals=clickhouse_query.has_totals(),
        robust=robust,
    )

    timer.mark("execute")
    stats.update(
        {"result_rows": len(result["data"]), "result_cols": len(result["meta"])}
    )

    return result
コード例 #6
0
def _apply_turbo_sampling_if_needed(
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
) -> None:
    """
    TODO: Remove this method entirely and move the sampling logic
    into a query processor.
    """
    if isinstance(clickhouse_query, Query):
        if (query_settings.get_turbo()
                and not clickhouse_query.get_from_clause().sampling_rate):
            clickhouse_query.set_from_clause(
                replace(
                    clickhouse_query.get_from_clause(),
                    sampling_rate=snuba_settings.TURBO_SAMPLE_RATE,
                ))
コード例 #7
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        if query_settings.get_turbo():
            return

        project_ids = get_object_ids_in_query_ast(query, self.__project_column)

        if project_ids is None:
            self._set_query_final(query, False)
            return

        flags: ProjectsQueryFlags = ProjectsQueryFlags.load_from_redis(
            list(project_ids), self.__replacer_state_name
        )

        query_overlaps_replacement = self._query_overlaps_replacements(
            query, flags.latest_replacement_time
        )

        if not query_overlaps_replacement:
            self._set_query_final(query, False)
            return

        tags = self._initialize_tags(query_settings, flags)
        set_final = False

        if flags.needs_final:
            tags["cause"] = "final_flag"
            metrics.increment(
                name=FINAL_METRIC,
                tags=tags,
            )
            set_final = True
        elif flags.group_ids_to_exclude:
            # If the number of groups to exclude exceeds our limit, the query
            # should just use final instead of the exclusion set.
            max_group_ids_exclude = get_config(
                "max_group_ids_exclude",
                settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE,
            )
            assert isinstance(max_group_ids_exclude, int)
            groups_to_exclude = self._groups_to_exclude(
                query, flags.group_ids_to_exclude
            )
            if len(groups_to_exclude) > max_group_ids_exclude:
                tags["cause"] = "max_groups"
                metrics.increment(
                    name=FINAL_METRIC,
                    tags=tags,
                )
                set_final = True
            elif groups_to_exclude:
                query.add_condition_to_ast(
                    not_in_condition(
                        FunctionCall(
                            None,
                            "assumeNotNull",
                            (Column(None, None, self.__groups_column),),
                        ),
                        [Literal(None, p) for p in groups_to_exclude],
                    )
                )

        self._set_query_final(query, set_final)
コード例 #8
0
 def _is_already_applied(self, query_settings: QuerySettings) -> bool:
     existing = query_settings.get_rate_limit_params()
     for ex in existing:
         if ex.rate_limit_name == self.rate_limit_name:
             return True
     return False