Ejemplo n.º 1
0
def test_project_extension_query_adds_rate_limits():
    extension = ProjectExtension(processor=ProjectExtensionProcessor(
        project_column="project_id"))
    raw_data = {'project': [2, 3]}
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query(
        {"conditions": []},
        TableSource("my_table", ColumnSet([])),
    )
    request_settings = RequestSettings(turbo=False,
                                       consistent=False,
                                       debug=False)

    num_rate_limits_before_processing = len(
        request_settings.get_rate_limit_params())
    extension.get_processor().process_query(query, valid_data,
                                            request_settings)

    rate_limits = request_settings.get_rate_limit_params()
    # make sure a rate limit was added by the processing
    assert len(rate_limits) == num_rate_limits_before_processing + 1

    most_recent_rate_limit = rate_limits[-1]
    assert most_recent_rate_limit.bucket == '2'
    assert most_recent_rate_limit.per_second_limit == 1000
    assert most_recent_rate_limit.concurrent_limit == 1000
Ejemplo n.º 2
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        # If the settings don't already have a project rate limit, add one
        existing = request_settings.get_rate_limit_params()
        for ex in existing:
            if ex.rate_limit_name == PROJECT_RATE_LIMIT_NAME:
                return

        project_ids = get_project_ids_in_query_ast(query, self.project_column)
        if not project_ids:
            return

        # TODO: Use all the projects, not just one
        project_id = project_ids.pop()

        prl, pcl = get_configs([("project_per_second_limit", 1000),
                                ("project_concurrent_limit", 1000)])

        # Specific projects can have their rate limits overridden
        (per_second, concurr) = get_configs([
            ("project_per_second_limit_{}".format(project_id), prl),
            ("project_concurrent_limit_{}".format(project_id), pcl),
        ])

        rate_limit = RateLimitParameters(
            rate_limit_name=PROJECT_RATE_LIMIT_NAME,
            bucket=str(project_id),
            per_second_limit=per_second,
            concurrent_limit=concurr,
        )

        request_settings.add_rate_limit(rate_limit)
Ejemplo n.º 3
0
def execute_query_with_rate_limits(
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
) -> Result:
    # XXX: We should consider moving this that it applies to the logical query,
    # not the physical query.
    with RateLimitAggregator(request_settings.get_rate_limit_params()
                             ) as rate_limit_stats_container:
        stats.update(rate_limit_stats_container.to_dict())
        timer.mark("rate_limit")

        project_rate_limit_stats = rate_limit_stats_container.get_stats(
            PROJECT_RATE_LIMIT_NAME)

        if ("max_threads" in query_settings
                and project_rate_limit_stats is not None
                and project_rate_limit_stats.concurrent > 1):
            maxt = query_settings["max_threads"]
            query_settings["max_threads"] = max(
                1, maxt - project_rate_limit_stats.concurrent + 1)

        return execute_query(
            clickhouse_query,
            request_settings,
            formatted_query,
            reader,
            timer,
            stats,
            query_settings,
        )
Ejemplo n.º 4
0
def test_project_extension_project_rate_limits_are_overridden():
    extension = ProjectExtension(
        processor=ProjectExtensionProcessor()
    )
    raw_data = {
        'project': [2, 3]
    }
    valid_data = validate_jsonschema(raw_data, extension.get_schema())
    query = Query({
        'conditions': []
    })
    request_settings = RequestSettings(turbo=False, consistent=False, debug=False)
    state.set_config('project_per_second_limit_2', 5)
    state.set_config('project_concurrent_limit_2', 10)

    extension.get_processor().process_query(query, valid_data, request_settings)

    rate_limits = request_settings.get_rate_limit_params()
    most_recent_rate_limit = rate_limits[-1]

    assert most_recent_rate_limit.bucket == '2'
    assert most_recent_rate_limit.per_second_limit == 5
    assert most_recent_rate_limit.concurrent_limit == 10