Exemple #1
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        # If the settings don't already have a project rate limit, add one
        existing = request_settings.get_rate_limit_params()
        for ex in existing:
            if ex.rate_limit_name == PROJECT_RATE_LIMIT_NAME:
                return

        project_ids = get_object_ids_in_query_ast(query, self.project_column)
        if not project_ids:
            return

        # TODO: Use all the projects, not just one
        project_id = project_ids.pop()

        prl, pcl = get_configs([("project_per_second_limit", 1000),
                                ("project_concurrent_limit", 1000)])

        # Specific projects can have their rate limits overridden
        (per_second, concurr) = get_configs([
            ("project_per_second_limit_{}".format(project_id), prl),
            ("project_concurrent_limit_{}".format(project_id), pcl),
        ])

        rate_limit = RateLimitParameters(
            rate_limit_name=PROJECT_RATE_LIMIT_NAME,
            bucket=str(project_id),
            per_second_limit=per_second,
            concurrent_limit=concurr,
        )

        request_settings.add_rate_limit(rate_limit)
    def get_object_id(
        self, query: Query, query_settings: QuerySettings
    ) -> Optional[str]:
        obj_ids = get_object_ids_in_query_ast(query, self.object_column)
        if not obj_ids:
            return None

        # TODO: Add logic for multiple IDs
        obj_id = str(obj_ids.pop())
        return str(obj_id)
Exemple #3
0
def test_find_projects(
    query_body: MutableMapping[str, Any], expected_projects: Optional[Set[int]]
) -> None:
    events = get_dataset("events")
    if expected_projects is None:
        with pytest.raises(ParsingException):
            snql_query = json_to_snql(query_body, "events")
            query, _ = parse_snql_query(str(snql_query), events)
            identity_translate(query)
    else:
        snql_query = json_to_snql(query_body, "events")
        query, _ = parse_snql_query(str(snql_query), events)
        query = identity_translate(query)
        project_ids_ast = get_object_ids_in_query_ast(query, "project_id")
        assert project_ids_ast == expected_projects
    def get_object_id(
        self, query: Query, query_settings: QuerySettings
    ) -> Optional[str]:
        obj_ids = get_object_ids_in_query_ast(query, self.object_column)
        if not obj_ids:
            return None

        # TODO: Add logic for multiple IDs
        obj_id = str(obj_ids.pop())
        if self.query_settings_field is not None:
            query_settings_field_val = getattr(
                query_settings, self.query_settings_field, None
            )
            if query_settings_field_val is not None:
                obj_id = f"{obj_id}_{query_settings_field_val}"
        return str(obj_id)
Exemple #5
0
def is_in_experiment(query: LogicalQuery, referrer: str) -> bool:
    if referrer != "tagstore.__get_tag_keys":
        return False

    project_ids = get_object_ids_in_query_ast(query, "project_id")
    if not project_ids:
        return False

    test_projects_raw = state.get_config("snuplicator-sampling-projects", "")
    test_projects = set()
    if (isinstance(test_projects_raw, str)
            and test_projects_raw != ""):  # should be in the form [1,2,3]
        test_projects_raw = test_projects_raw[1:-1]
        test_projects = set(int(p) for p in test_projects_raw.split(",") if p)
    elif isinstance(test_projects_raw, (int, float)):
        test_projects = {int(test_projects_raw)}

    return project_ids.issubset(test_projects)
Exemple #6
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        enabled = get_config(ENABLED_CONFIG, 1)
        if not enabled:
            return

        project_ids = get_object_ids_in_query_ast(query, self.__project_field)
        if not project_ids:
            return

        # TODO: Like for the rate limiter Add logic for multiple IDs
        project_id = str(project_ids.pop())
        thread_quota = get_config(
            f"{REFERRER_PROJECT_CONFIG}_{query_settings.referrer}_{project_id}"
        )

        if not thread_quota:
            return

        assert isinstance(thread_quota, int)
        query_settings.set_resource_quota(ResourceQuota(max_threads=thread_quota))
Exemple #7
0
    def _groups_to_exclude(
        self, query: Query, group_ids_to_exclude: Set[int]
    ) -> Set[int]:
        """
        Given a Query and the group ids to exclude for any project
        this query touches, returns the intersection of the group ids
        from the replacements and the group ids this Query explicitly
        queries for, if any.

        Eg.
        - The query specifically looks for group ids: {1, 2}
        - The replacements on the projects require exclusion of groups: {1, 3, 4}
        - The query only needs to exclude group id 1
        """

        groups_in_query = get_object_ids_in_query_ast(query, self.__groups_column)

        if groups_in_query:
            group_ids_to_exclude = group_ids_to_exclude.intersection(groups_in_query)

        return group_ids_to_exclude
Exemple #8
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        if request_settings.get_turbo():
            return

        project_ids = get_object_ids_in_query_ast(query, self.__project_column)

        set_final = False
        if project_ids:
            final, exclude_group_ids = get_projects_query_flags(
                list(project_ids),
                self.__replacer_state_name,
            )
            if final:
                metrics.increment("final", tags={"cause": "final_flag"})
            if not final and exclude_group_ids:
                # If the number of groups to exclude exceeds our limit, the query
                # should just use final instead of the exclusion set.
                max_group_ids_exclude = get_config(
                    "max_group_ids_exclude",
                    settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE)
                assert isinstance(max_group_ids_exclude, int)
                if len(exclude_group_ids) > max_group_ids_exclude:
                    metrics.increment("final", tags={"cause": "max_groups"})
                    set_final = True
                else:
                    query.add_condition_to_ast(
                        not_in_condition(
                            FunctionCall(None, "assumeNotNull",
                                         (Column(None, None, "group_id"), )),
                            [Literal(None, p) for p in exclude_group_ids],
                        ))
            else:
                set_final = final

        query.set_from_clause(replace(query.get_from_clause(),
                                      final=set_final))
Exemple #9
0
    def process_query(self, query: Query, query_settings: QuerySettings) -> None:
        if query_settings.get_turbo():
            return

        project_ids = get_object_ids_in_query_ast(query, self.__project_column)

        if project_ids is None:
            self._set_query_final(query, False)
            return

        flags: ProjectsQueryFlags = ProjectsQueryFlags.load_from_redis(
            list(project_ids), self.__replacer_state_name
        )

        query_overlaps_replacement = self._query_overlaps_replacements(
            query, flags.latest_replacement_time
        )

        if not query_overlaps_replacement:
            self._set_query_final(query, False)
            return

        tags = self._initialize_tags(query_settings, flags)
        set_final = False

        if flags.needs_final:
            tags["cause"] = "final_flag"
            metrics.increment(
                name=FINAL_METRIC,
                tags=tags,
            )
            set_final = True
        elif flags.group_ids_to_exclude:
            # If the number of groups to exclude exceeds our limit, the query
            # should just use final instead of the exclusion set.
            max_group_ids_exclude = get_config(
                "max_group_ids_exclude",
                settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE,
            )
            assert isinstance(max_group_ids_exclude, int)
            groups_to_exclude = self._groups_to_exclude(
                query, flags.group_ids_to_exclude
            )
            if len(groups_to_exclude) > max_group_ids_exclude:
                tags["cause"] = "max_groups"
                metrics.increment(
                    name=FINAL_METRIC,
                    tags=tags,
                )
                set_final = True
            elif groups_to_exclude:
                query.add_condition_to_ast(
                    not_in_condition(
                        FunctionCall(
                            None,
                            "assumeNotNull",
                            (Column(None, None, self.__groups_column),),
                        ),
                        [Literal(None, p) for p in groups_to_exclude],
                    )
                )

        self._set_query_final(query, set_final)
Exemple #10
0
def build_request(
    body: MutableMapping[str, Any],
    parser: Parser,
    settings_class: Union[Type[HTTPQuerySettings], Type[SubscriptionQuerySettings]],
    schema: RequestSchema,
    dataset: Dataset,
    timer: Timer,
    referrer: str,
    custom_processing: Optional[CustomProcessors] = None,
) -> Request:
    with sentry_sdk.start_span(description="build_request", op="validate") as span:
        try:
            request_parts = schema.validate(body)
            if settings_class == HTTPQuerySettings:
                query_settings: MutableMapping[str, bool | str] = {
                    **request_parts.query_settings,
                    "consistent": _consistent_override(
                        request_parts.query_settings.get("consistent", False), referrer
                    ),
                }
                query_settings["referrer"] = referrer
                # TODO: referrer probably doesn't need to be passed in, it should be from the body
                settings_obj: Union[
                    HTTPQuerySettings, SubscriptionQuerySettings
                ] = settings_class(
                    **query_settings,
                )
            elif settings_class == SubscriptionQuerySettings:
                settings_obj = settings_class(
                    consistent=_consistent_override(True, referrer),
                )
            query, snql_anonymized = parser(
                request_parts, settings_obj, dataset, custom_processing
            )

            project_ids = get_object_ids_in_query_ast(query, "project_id")
            if project_ids is not None and len(project_ids) == 1:
                sentry_sdk.set_tag("snuba_project_id", project_ids.pop())

            org_ids = get_object_ids_in_query_ast(query, "org_id")
            if org_ids is not None and len(org_ids) == 1:
                sentry_sdk.set_tag("snuba_org_id", org_ids.pop())
            attribution_info = dict(request_parts.attribution_info)
            # TODO: clean this up
            attribution_info["app_id"] = get_app_id(
                request_parts.attribution_info["app_id"]
            )
            attribution_info["referrer"] = referrer

            request_id = uuid.uuid4().hex
            request = Request(
                id=request_id,
                # TODO: Replace this with the actual query raw body.
                # this can have an impact on subscriptions so we need
                # to be careful with the change.
                original_body=body,
                query=query,
                attribution_info=AttributionInfo(**attribution_info),
                query_settings=settings_obj,
                snql_anonymized=snql_anonymized,
            )
        except (InvalidJsonRequestException, InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data(
            "snuba_query_parsed",
            repr(query).split("\n"),
        )
        span.set_data(
            "snuba_query_raw",
            textwrap.wrap(repr(request.original_body), 100, break_long_words=False),
        )
        sentry_sdk.add_breadcrumb(
            category="query_info",
            level="info",
            message="snuba_query_raw",
            data={
                "query": textwrap.wrap(
                    repr(request.original_body), 100, break_long_words=False
                )
            },
        )

        timer.mark("validate_schema")
        return request
Exemple #11
0
def build_request(
    body: MutableMapping[str, Any],
    parser: Parser,
    settings_class: Union[Type[HTTPRequestSettings],
                          Type[SubscriptionRequestSettings]],
    schema: RequestSchema,
    dataset: Dataset,
    timer: Timer,
    referrer: str,
) -> Request:
    with sentry_sdk.start_span(description="build_request",
                               op="validate") as span:
        try:
            request_parts = schema.validate(body)
            if settings_class == HTTPRequestSettings:
                settings = {
                    **request_parts.settings,
                    "consistent":
                    _consistent_override(
                        request_parts.settings.get("consistent", False),
                        referrer),
                }
                settings_obj: Union[
                    HTTPRequestSettings,
                    SubscriptionRequestSettings] = settings_class(**settings)
            elif settings_class == SubscriptionRequestSettings:
                settings_obj = settings_class(
                    consistent=_consistent_override(True, referrer))

            query = parser(request_parts, settings_obj, dataset)

            project_ids = get_object_ids_in_query_ast(query, "project_id")
            if project_ids is not None and len(project_ids) == 1:
                sentry_sdk.set_tag("snuba_project_id", project_ids.pop())

            org_ids = get_object_ids_in_query_ast(query, "org_id")
            if org_ids is not None and len(org_ids) == 1:
                sentry_sdk.set_tag("snuba_org_id", org_ids.pop())

            request_id = uuid.uuid4().hex
            request = Request(
                request_id,
                # TODO: Replace this with the actual query raw body.
                # this can have an impact on subscriptions so we need
                # to be careful with the change.
                ChainMap(request_parts.query,
                         *request_parts.extensions.values()),
                query,
                settings_obj,
                referrer,
            )
        except (InvalidJsonRequestException,
                InvalidQueryException) as exception:
            record_invalid_request(timer, referrer)
            raise exception
        except Exception as exception:
            record_error_building_request(timer, referrer)
            raise exception

        span.set_data("snuba_query", request.body)

        timer.mark("validate_schema")
        return request
Exemple #12
0
 def _visit_simple_query(self,
                         data_source: ProcessableQuery[Entity]) -> Set[int]:
     return get_object_ids_in_query_ast(data_source, "project_id") or set()
Exemple #13
0
def test_find_projects(query_body: MutableMapping[str, Any],
                       expected_projects: Set[int]) -> None:
    events = get_dataset("events")
    query = identity_translate(parse_query(query_body, events))
    project_ids_ast = get_object_ids_in_query_ast(query, "project_id")
    assert project_ids_ast == expected_projects