예제 #1
0
def resolve_equation_list(
    equations: List[str],
    selected_columns: List[str],
    aggregates_only: Optional[bool] = False,
    auto_add: Optional[bool] = False,
    plain_math: Optional[bool] = False,
    use_snql: Optional[bool] = False,
) -> Tuple[List[JsonQueryType], List[str], List[Operation], List[bool]]:
    """Given a list of equation strings, resolve them to their equivalent snuba json query formats
    :param equations: list of equations strings that haven't been parsed yet
    :param selected_columns: list of public aliases from the endpoint, can be a mix of fields and aggregates
    :param aggregates_only: Optional parameter whether we need to enforce equations don't include fields
        intended for use with event-stats where fields aren't compatible since they change grouping
    :param: auto_add: Optional parameter that will take any fields in the equation that's missing in the
        selected_columns and return a new list with them added
    :param plain_math: Allow equations that don't include any fields or functions, disabled by default
    :param use_snql: Whether we're resolving for snql or not
    """
    resolved_equations: List[JsonQueryType] = []
    parsed_equations: List[ParsedEquation] = []
    resolved_columns: List[str] = selected_columns[:]
    for index, equation in enumerate(equations):
        parsed_equation, fields, functions = parse_arithmetic(
            equation, use_snql=use_snql)

        if (len(fields) == 0 and len(functions) == 0) and not plain_math:
            raise InvalidSearchQuery(
                "Equations need to include a field or function")
        if aggregates_only and len(functions) == 0:
            raise InvalidSearchQuery(
                "Only equations on aggregate functions are supported")

        for field in fields:
            if field not in selected_columns:
                if auto_add:
                    resolved_columns.append(field)
                else:
                    raise InvalidSearchQuery(
                        f"{field} used in an equation but is not a selected field"
                    )
        for function in functions:
            if function not in selected_columns:
                if auto_add:
                    resolved_columns.append(function)
                else:
                    raise InvalidSearchQuery(
                        f"{function} used in an equation but is not a selected function"
                    )

        # We just jam everything into resolved_equations because the json format can't take arithmetic in the aggregates
        # field, but can do the aliases in the selected_columns field
        resolved_equations.append(
            parsed_equation.to_snuba_json(f"equation[{index}]"))
        # TODO: currently returning "resolved_equations" for the json syntax
        # once we're converted to SnQL this should only return parsed_equations
        parsed_equations.append(
            ParsedEquation(parsed_equation,
                           len(functions) > 0))
    return resolved_equations, resolved_columns, parsed_equations
예제 #2
0
    def filter_by_stage(
        self,
        organization_id: int,
        operator: str,
        value,
        project_ids: Sequence[int] = None,
        environments: List[str] = None,
    ) -> models.QuerySet:
        from sentry.models import ReleaseProjectEnvironment, ReleaseStages
        from sentry.search.events.filter import to_list

        if not environments or len(environments) != 1:
            raise InvalidSearchQuery(
                "Choose a single environment to filter by release stage.")

        filters = {
            ReleaseStages.ADOPTED:
            Q(adopted__isnull=False, unadopted__isnull=True),
            ReleaseStages.REPLACED:
            Q(adopted__isnull=False, unadopted__isnull=False),
            ReleaseStages.LOW_ADOPTION:
            Q(adopted__isnull=True, unadopted__isnull=True),
        }
        value = to_list(value)
        operator_conversions = {"=": "IN", "!=": "NOT IN"}
        if operator in operator_conversions.keys():
            operator = operator_conversions.get(operator)

        for stage in value:
            if stage not in filters:
                raise InvalidSearchQuery("Unsupported release.stage value.")

        rpes = ReleaseProjectEnvironment.objects.filter(
            release__organization_id=organization_id, ).select_related(
                "release")

        if project_ids:
            rpes = rpes.filter(project_id__in=project_ids)

        query = Q()
        if operator == "IN":
            for stage in value:
                query |= filters[stage]
        elif operator == "NOT IN":
            for stage in value:
                query &= ~filters[stage]

        qs = self.filter(id__in=Subquery(
            rpes.filter(query).values_list("release_id", flat=True)))
        return qs
예제 #3
0
def translate_transaction_status(val):
    if val not in SPAN_STATUS_NAME_TO_CODE:
        raise InvalidSearchQuery(
            f"Invalid value {val} for transaction.status condition. Accepted "
            f"values are {', '.join(SPAN_STATUS_NAME_TO_CODE.keys())}"
        )
    return SPAN_STATUS_NAME_TO_CODE[val]
예제 #4
0
def convert_status_value(value, projects, user, environments):
    parsed = []
    for status in value:
        try:
            parsed.append(parse_status_value(status))
        except ValueError:
            raise InvalidSearchQuery(f"invalid status value of '{status}'")
    return parsed
예제 #5
0
def convert_condition_to_function(cond):
    function = OPERATOR_TO_FUNCTION.get(cond[1])
    if not function:
        # It's hard to make this error more specific without exposing internals to the end user
        raise InvalidSearchQuery(
            f"Operator {cond[1]} is not a valid condition operator.")

    return [function, [cond[0], cond[2]]]
예제 #6
0
    def resolve_metric(self, value: str) -> int:
        metric_id = indexer.resolve(constants.METRICS_MAP.get(value, value))
        if metric_id is None:
            # TODO: unsure if this should be incompatible or invalid
            raise InvalidSearchQuery(f"Metric: {value} could not be resolved")

        self.builder.metric_ids.append(metric_id)
        return metric_id
예제 #7
0
    def visit_is_filter(self, node, children):
        # the key is "is" here, which we don't need
        negation, _, _, search_value = children

        if search_value.raw_value.startswith("["):
            raise InvalidSearchQuery('"in" syntax invalid for "is" search')

        if search_value.raw_value not in self.is_filter_translators:
            raise InvalidSearchQuery(
                'Invalid value for "is" search, valid values are {}'.format(
                    sorted(self.is_filter_translators.keys())))

        search_key, search_value = self.is_filter_translators[
            search_value.raw_value]

        operator = "!=" if self.is_negated(negation) else "="

        return SearchFilter(search_key, operator, search_value)
예제 #8
0
def parse_search_query(query):
    try:
        tree = event_search_grammar.parse(query)
    except IncompleteParseError as e:
        raise InvalidSearchQuery("%s %s" % (
            "Parse error: %r (column %d)." % (e.expr.name, e.column()),
            "This is commonly caused by unmatched-parentheses. Enclose any text in double quotes.",
        ))
    return IssueSearchVisitor(allow_boolean=False).visit(tree)
예제 #9
0
    def resolve_where(self, query: str) -> None:
        try:
            parsed_terms = parse_search_query(query, allow_boolean=True, params=self.params)
        except ParseError as e:
            raise InvalidSearchQuery(f"Parse error: {e.expr.name} (column {e.column():d})")

        for term in parsed_terms:
            if isinstance(term, SearchFilter):
                conditions = self.format_search_filter(term)
                if conditions:
                    self.where.append(conditions)
예제 #10
0
    def test_discover_invalid_search_query(self, emailer, mock_query):
        de = ExportedData.objects.create(
            user=self.user,
            organization=self.org,
            query_type=ExportQueryType.DISCOVER,
            query_info={"project": [self.project.id], "field": ["title"], "query": ""},
        )

        mock_query.side_effect = InvalidSearchQuery("test")
        with self.tasks():
            assemble_download(de.id)
        error = emailer.call_args[1]["message"]
        assert error == "Invalid query. Please fix the query and try again."

        # unicode
        mock_query.side_effect = InvalidSearchQuery("\xfc")
        with self.tasks():
            assemble_download(de.id)
        error = emailer.call_args[1]["message"]
        assert error == "Invalid query. Please fix the query and try again."
예제 #11
0
 def apply(self, queryset, search_filter):
     value = search_filter.value.raw_value
     q = self.callback(value)
     if search_filter.operator not in ("=", "!=", "IN", "NOT IN"):
         raise InvalidSearchQuery(
             f"Operator {search_filter.operator} not valid for search {search_filter}"
         )
     queryset_method = (
         queryset.filter if search_filter.operator in EQUALITY_OPERATORS else queryset.exclude
     )
     queryset = queryset_method(q)
     return queryset
예제 #12
0
def resolve_equation_list(
    equations: List[str], snuba_filter: eventstore.Filter
) -> Dict[str, JsonQueryType]:
    selected_columns = snuba_filter.selected_columns
    for index, equation in enumerate(equations):
        # only supporting 1 operation for now
        parsed_equation, fields = parse_arithmetic(equation, max_operators=1)
        for field in fields:
            if field not in fields:
                raise InvalidSearchQuery(f"{field} used in an equation but is not a selected field")
        selected_columns.append(parsed_equation.to_snuba_json(f"equation[{index}]"))
    return {"selected_columns": selected_columns}
예제 #13
0
def parse_semver(version, operator) -> Optional[SemverFilter]:
    """
    Attempts to parse a release version using our semver syntax. version should be in
    format `<package_name>@<version>` or `<version>`, where package_name is a string and
    version is a version string matching semver format (https://semver.org/). We've
    slightly extended this format to allow up to 4 integers. EG
     - [email protected]
     - [email protected]
     - 1.2.3.4
     - 1.2.3.4-alpha
     - 1.*
    """
    (operator, negated) = handle_operator_negation(operator)
    operator = OPERATOR_TO_DJANGO[operator]
    version = version if "@" in version else f"{SEMVER_FAKE_PACKAGE}@{version}"
    parsed = parse_release_relay(version)
    parsed_version = parsed.get("version_parsed")
    if parsed_version:
        # Convert `pre` to always be a string
        prerelease = parsed_version["pre"] if parsed_version["pre"] else ""
        semver_filter = SemverFilter(
            operator,
            [
                parsed_version["major"],
                parsed_version["minor"],
                parsed_version["patch"],
                parsed_version["revision"],
                0 if prerelease else 1,
                prerelease,
            ],
            negated=negated,
        )
        if parsed["package"] and parsed["package"] != SEMVER_FAKE_PACKAGE:
            semver_filter.package = parsed["package"]
        return semver_filter
    else:
        # Try to parse as a wildcard match
        package, version = version.split("@", 1)
        version_parts = []
        if version:
            for part in version.split(".", 3):
                if part in SEMVER_WILDCARDS:
                    break
                try:
                    # We assume all ints for a wildcard match - not handling prerelease as
                    # part of these
                    version_parts.append(int(part))
                except ValueError:
                    raise InvalidSearchQuery(INVALID_SEMVER_MESSAGE)

        package = package if package and package != SEMVER_FAKE_PACKAGE else None
        return SemverFilter("exact", version_parts, package, negated)
예제 #14
0
def project_slug_converter(
    builder: QueryBuilder, search_filter: SearchFilter
) -> Optional[WhereType]:
    """Convert project slugs to ids and create a filter based on those.
    This is cause we only store project ids in clickhouse.
    """
    value = search_filter.value.value

    if Op(search_filter.operator) == Op.EQ and value == "":
        raise InvalidSearchQuery(
            'Cannot query for has:project or project:"" as every event will have a project'
        )

    slugs = to_list(value)
    project_slugs: Mapping[str, int] = {
        slug: project_id for slug, project_id in builder.project_slugs.items() if slug in slugs
    }
    missing: List[str] = [slug for slug in slugs if slug not in project_slugs]
    if missing and search_filter.operator in constants.EQUALITY_OPERATORS:
        raise InvalidSearchQuery(
            f"Invalid query. Project(s) {', '.join(missing)} do not exist or are not actively selected."
        )
    # Sorted for consistent query results
    project_ids = list(sorted(project_slugs.values()))
    if project_ids:
        # Create a new search filter with the correct values
        converted_filter = builder.convert_search_filter_to_condition(
            SearchFilter(
                SearchKey("project.id"),
                search_filter.operator,
                SearchValue(project_ids if search_filter.is_in_filter else project_ids[0]),
            )
        )
        if converted_filter:
            if search_filter.operator in constants.EQUALITY_OPERATORS:
                builder.projects_to_filter.update(project_ids)
            return converted_filter

    return None
예제 #15
0
 def convert_search_filter(search_filter):
     if search_filter.key.name in value_converters:
         converter = value_converters[search_filter.key.name]
         new_value = converter(to_list(search_filter.value.raw_value),
                               projects, user, environments)
         search_filter = search_filter._replace(
             value=SearchValue(new_value),
             operator="IN"
             if search_filter.operator in EQUALITY_OPERATORS else "NOT IN",
         )
     elif isinstance(search_filter, AggregateFilter):
         raise InvalidSearchQuery(
             f"Aggregate filters ({search_filter.key.name}) are not supported in issue searches."
         )
     return search_filter
예제 #16
0
def _error_handled_filter_converter(
    search_filter: SearchFilter,
    name: str,
    params: Optional[Mapping[str, Union[int, str, datetime]]],
):
    value = search_filter.value.value
    # Treat has filter as equivalent to handled
    if search_filter.value.raw_value == "":
        output = 1 if search_filter.operator == "!=" else 0
        return [["isHandled", []], "=", output]
    # Null values and 1 are the same, and both indicate a handled error.
    if value in ("1", 1):
        return [["isHandled", []], "=", 1]
    if value in ("0", 0):
        return [["notHandled", []], "=", 1]
    raise InvalidSearchQuery("Invalid value for error.handled condition. Accepted values are 1, 0")
예제 #17
0
def team_key_transaction_filter(builder: QueryBuilder, search_filter: SearchFilter) -> WhereType:
    value = search_filter.value.value
    key_transaction_expr = builder.resolve_field_alias(constants.TEAM_KEY_TRANSACTION_ALIAS)

    if search_filter.value.raw_value == "":
        return Condition(
            key_transaction_expr, Op.NEQ if search_filter.operator == "!=" else Op.EQ, 0
        )
    if value in ("1", 1):
        return Condition(key_transaction_expr, Op.EQ, 1)
    if value in ("0", 0):
        return Condition(key_transaction_expr, Op.EQ, 0)

    raise InvalidSearchQuery(
        "Invalid value for key_transaction condition. Accepted values are 1, 0"
    )
예제 #18
0
def _error_unhandled_filter_converter(
    search_filter: SearchFilter,
    name: str,
    params: Optional[Mapping[str, Union[int, str, datetime]]],
):
    value = search_filter.value.value
    # This field is the inversion of error.handled, otherwise the logic is the same.
    if search_filter.value.raw_value == "":
        output = 0 if search_filter.operator == "!=" else 1
        return [["isHandled", []], "=", output]
    if value in ("1", 1):
        return [["notHandled", []], "=", 1]
    if value in ("0", 0):
        return [["isHandled", []], "=", 1]
    raise InvalidSearchQuery(
        "Invalid value for error.unhandled condition. Accepted values are 1, 0"
    )
예제 #19
0
def _team_key_transaction_filter_converter(
    search_filter: SearchFilter,
    name: str,
    params: Optional[Mapping[str, Union[int, str, datetime]]],
):
    value = search_filter.value.value
    key_transaction_expr = FIELD_ALIASES[TEAM_KEY_TRANSACTION_ALIAS].get_field(params)

    if search_filter.value.raw_value == "":
        operator = "!=" if search_filter.operator == "!=" else "="
        return [key_transaction_expr, operator, 0]
    if value in ("1", 1):
        return [key_transaction_expr, "=", 1]
    if value in ("0", 0):
        return [key_transaction_expr, "=", 0]
    raise InvalidSearchQuery(
        "Invalid value for key_transaction condition. Accepted values are 1, 0"
    )
예제 #20
0
    def _resolve_web_vital_function(
        self,
        args: Mapping[str, Union[str, Column, SelectType, int, float]],
        alias: str,
    ) -> SelectType:
        column = args["column"]
        metric_id = args["metric_id"]
        quality = args["quality"].lower()

        if column not in [
                "measurements.lcp",
                "measurements.fcp",
                "measurements.fp",
                "measurements.fid",
                "measurements.cls",
        ]:
            raise InvalidSearchQuery(
                "count_web_vitals only supports measurements")

        measurement_rating = self.builder.resolve_column("measurement_rating")

        quality_id = indexer.resolve(quality)
        if quality_id is None:
            return Function(
                # This matches the type from doing `select toTypeName(count()) ...` from clickhouse
                "toUInt64",
                [0],
                alias,
            )

        return Function(
            "countIf",
            [
                Column("value"),
                Function(
                    "and",
                    [
                        Function("equals", [measurement_rating, quality_id]),
                        Function("equals", [Column("metric_id"), metric_id]),
                    ],
                ),
            ],
            alias,
        )
    def get_v1_results(self, request, organization):
        try:
            snuba_args = self.get_snuba_query_args_legacy(request, organization)
        except InvalidSearchQuery as exc:
            raise ParseError(detail=str(exc))
        except NoProjects:
            return Response({"data": []})

        snuba_args = self.get_field(request, snuba_args)
        rollup = get_rollup_from_request(
            request,
            snuba_args,
            default_interval=None,
            error=InvalidSearchQuery(
                "Your interval and date range would create too many results. "
                "Use a larger interval, or a smaller date range."
            ),
        )

        result = transform_aliases_and_query(
            aggregations=snuba_args.get("aggregations"),
            conditions=snuba_args.get("conditions"),
            filter_keys=snuba_args.get("filter_keys"),
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            orderby="time",
            groupby=["time"],
            rollup=rollup,
            referrer="api.organization-events-stats",
            limit=10000,
        )
        serializer = SnubaTSResultSerializer(organization, None, request.user)
        return Response(
            serializer.serialize(
                snuba.SnubaTSResult(result, snuba_args["start"], snuba_args["end"], rollup)
            ),
            status=200,
        )
예제 #22
0
    def resolve_params(self) -> None:
        """Keys included as url params take precedent if same key is included in search
        They are also considered safe and to have had access rules applied unlike conditions
        from the query string.
        """
        # start/end are required so that we can run a query in a reasonable amount of time
        if "start" not in self.params or "end" not in self.params:
            raise InvalidSearchQuery("Cannot query without a valid date range")
        start, end = self.params["start"], self.params["end"]

        # TODO: this validation should be done when we create the params dataclass instead
        assert isinstance(start, datetime) and isinstance(
            end, datetime
        ), "Both start and end params must be datetime objects"
        assert all(
            isinstance(project_id, int) for project_id in self.params.get("project_id", [])
        ), "All project id params must be ints"

        self.where.append(Condition(self.column("timestamp"), Op.GTE, start))
        self.where.append(Condition(self.column("timestamp"), Op.LT, end))

        if "project_id" in self.params:
            self.where.append(
                Condition(
                    self.column("project_id"),
                    Op.IN,
                    self.params["project_id"],
                )
            )

        if "environment" in self.params:
            term = SearchFilter(
                SearchKey("environment"), "=", SearchValue(self.params["environment"])
            )
            condition = self._environment_filter_converter(term, "environment")
            if condition:
                self.where.append(condition)
예제 #23
0
def inbox_search(
    projects: Sequence[Project],
    environments: Optional[Sequence[Environment]] = None,
    limit: int = 100,
    cursor: Optional[Cursor] = None,
    count_hits: bool = False,
    search_filters: Optional[Sequence[SearchFilter]] = None,
    date_from: Optional[datetime] = None,
    date_to: Optional[datetime] = None,
    max_hits: Optional[int] = None,
) -> CursorResult:
    now: datetime = timezone.now()
    end: Optional[datetime] = None
    end_params: List[datetime] = [
        _f for _f in [date_to,
                      get_search_filter(search_filters, "date", "<")] if _f
    ]
    if end_params:
        end = min(end_params)

    end = end if end else now + ALLOWED_FUTURE_DELTA

    # We only want to search back a week at most, since that's the oldest inbox rows
    # can be.
    earliest_date = now - timedelta(days=7)
    start_params = [
        date_from, earliest_date,
        get_search_filter(search_filters, "date", ">")
    ]
    start = max(_f for _f in start_params if _f)
    end = max([earliest_date, end])

    if start >= end:
        return Paginator(Group.objects.none()).get_result()

    # Make sure search terms are valid
    invalid_search_terms = [
        str(sf) for sf in search_filters
        if sf.key.name not in allowed_inbox_search_terms
    ]
    if invalid_search_terms:
        raise InvalidSearchQuery(
            f"Invalid search terms for 'inbox' search: {invalid_search_terms}")

    # Make sure this is an inbox search
    if not get_search_filter(search_filters, "for_review", "="):
        raise InvalidSearchQuery(
            "Sort key 'inbox' only supported for inbox search")

    if get_search_filter(search_filters, "status",
                         "=") != GroupStatus.UNRESOLVED and get_search_filter(
                             search_filters, "status",
                             "IN") != [GroupStatus.UNRESOLVED]:
        raise InvalidSearchQuery(
            "Inbox search only works for 'unresolved' status")

    # We just filter on `GroupInbox.date_added` here, and don't filter by date
    # on the group. This keeps the query simpler and faster in some edge cases,
    # and date_added is a good enough proxy when we're using this sort.
    qs = GroupInbox.objects.filter(
        date_added__gte=start,
        date_added__lte=end,
        project__in=projects,
    )

    if environments is not None:
        environment_ids: List[int] = [
            environment.id for environment in environments
        ]
        qs = qs.filter(group_id__in=GroupEnvironment.objects.filter(
            environment_id__in=environment_ids).values_list(
                "group_id", flat=True).distinct())

    owner_search = get_search_filter(search_filters, "assigned_or_suggested",
                                     "IN")
    if owner_search:
        qs = qs.filter(
            assigned_or_suggested_filter(owner_search,
                                         projects,
                                         field_filter="group_id"))

    paginator = DateTimePaginator(qs.order_by("date_added"), "-date_added")
    results = paginator.get_result(limit,
                                   cursor,
                                   count_hits=count_hits,
                                   max_hits=max_hits)

    # We want to return groups from the endpoint, but have the cursor be related to the
    # GroupInbox rows. So we paginate on the GroupInbox results queryset, then fetch
    # the group_ids out and use them to get the actual groups.
    group_qs = Group.objects.filter(
        id__in=[r.group_id for r in results.results],
        project__in=projects,
        status=GroupStatus.UNRESOLVED,
    )
    groups: Mapping[int, Group] = {g.id: g for g in group_qs}
    results.results = [
        groups[r.group_id] for r in results.results if r.group_id in groups
    ]
    return results
예제 #24
0
    def query(
        self,
        projects,
        retention_window_start,
        group_queryset,
        environments,
        sort_by,
        limit,
        cursor,
        count_hits,
        paginator_options,
        search_filters,
        date_from,
        date_to,
        max_hits=None,
    ):

        now = timezone.now()
        end = None
        end_params = [
            _f for _f in
            [date_to, get_search_filter(search_filters, "date", "<")] if _f
        ]
        if end_params:
            end = min(end_params)

        if not end:
            end = now + ALLOWED_FUTURE_DELTA

            metrics.incr("snuba.search.postgres_only")

            # This search is for some time window that ends with "now",
            # so if the requested sort is `date` (`last_seen`) and there
            # are no other Snuba-based search predicates, we can simply
            # return the results from Postgres.
            if (cursor is None and sort_by == "date" and
                    # This handles tags and date parameters for search filters.
                    not [
                        sf for sf in search_filters if sf.key.name not in
                        self.postgres_only_fields.union(["date"])
                    ]):
                group_queryset = group_queryset.order_by("-last_seen")
                paginator = DateTimePaginator(group_queryset, "-last_seen",
                                              **paginator_options)
                # When its a simple django-only search, we count_hits like normal
                return paginator.get_result(limit,
                                            cursor,
                                            count_hits=count_hits,
                                            max_hits=max_hits)

        # TODO: Presumably we only want to search back to the project's max
        # retention date, which may be closer than 90 days in the past, but
        # apparently `retention_window_start` can be None(?), so we need a
        # fallback.
        retention_date = max([
            _f for _f in [retention_window_start, now - timedelta(days=90)]
            if _f
        ])
        start_params = [
            date_from, retention_date,
            get_search_filter(search_filters, "date", ">")
        ]
        start = max([_f for _f in start_params if _f])
        end = max([retention_date, end])

        if start == retention_date and end == retention_date:
            # Both `start` and `end` must have been trimmed to `retention_date`,
            # so this entire search was against a time range that is outside of
            # retention. We'll return empty results to maintain backwards compatibility
            # with Django search (for now).
            return self.empty_result

        if start >= end:
            # TODO: This maintains backwards compatibility with Django search, but
            # in the future we should find a way to notify the user that their search
            # is invalid.
            return self.empty_result

        # This search is specific to Inbox. If we're using inbox sort and only querying
        # postgres then we can use this sort method. Otherwise if we need to go to Snuba,
        # fail.
        if (sort_by == "inbox"
                and get_search_filter(search_filters, "for_review", "=")
                # This handles tags and date parameters for search filters.
                and not [
                    sf for sf in search_filters if sf.key.name not in
                    self.postgres_only_fields.union(["date"])
                ]):
            # We just filter on `GroupInbox.date_added` here, and don't filter by date
            # on the group. This keeps the query simpler and faster in some edge cases,
            # and date_added is a good enough proxy when we're using this sort.
            group_queryset = group_queryset.filter(
                groupinbox__date_added__gte=start,
                groupinbox__date_added__lte=end,
            )
            group_queryset = group_queryset.extra(select={
                "inbox_date":
                "sentry_groupinbox.date_added"
            }, ).order_by("-inbox_date")
            paginator = DateTimePaginator(group_queryset, "-inbox_date",
                                          **paginator_options)
            return paginator.get_result(limit,
                                        cursor,
                                        count_hits=count_hits,
                                        max_hits=max_hits)

        if sort_by == "inbox":
            raise InvalidSearchQuery(
                f"Sort key '{sort_by}' only supported for inbox search")

        # Here we check if all the django filters reduce the set of groups down
        # to something that we can send down to Snuba in a `group_id IN (...)`
        # clause.
        max_candidates = options.get("snuba.search.max-pre-snuba-candidates")

        with sentry_sdk.start_span(op="snuba_group_query") as span:
            group_ids = list(
                group_queryset.values_list("id",
                                           flat=True)[:max_candidates + 1])
            span.set_data("Max Candidates", max_candidates)
            span.set_data("Result Size", len(group_ids))
        metrics.timing("snuba.search.num_candidates", len(group_ids))

        too_many_candidates = False
        if not group_ids:
            # no matches could possibly be found from this point on
            metrics.incr("snuba.search.no_candidates", skip_internal=False)
            return self.empty_result
        elif len(group_ids) > max_candidates:
            # If the pre-filter query didn't include anything to significantly
            # filter down the number of results (from 'first_release', 'query',
            # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
            # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
            # might have surpassed the `max_candidates`. In this case,
            # we *don't* want to pass candidates down to Snuba, and instead we
            # want Snuba to do all the filtering/sorting it can and *then* apply
            # this queryset to the results from Snuba, which we call
            # post-filtering.
            metrics.incr("snuba.search.too_many_candidates",
                         skip_internal=False)
            too_many_candidates = True
            group_ids = []

        sort_field = self.sort_strategies[sort_by]
        chunk_growth = options.get("snuba.search.chunk-growth-rate")
        max_chunk_size = options.get("snuba.search.max-chunk-size")
        chunk_limit = limit
        offset = 0
        num_chunks = 0
        hits = self.calculate_hits(
            group_ids,
            too_many_candidates,
            sort_field,
            projects,
            retention_window_start,
            group_queryset,
            environments,
            sort_by,
            limit,
            cursor,
            count_hits,
            paginator_options,
            search_filters,
            start,
            end,
        )
        if count_hits and hits == 0:
            return self.empty_result

        paginator_results = self.empty_result
        result_groups = []
        result_group_ids = set()

        max_time = options.get("snuba.search.max-total-chunk-time-seconds")
        time_start = time.time()

        # Do smaller searches in chunks until we have enough results
        # to answer the query (or hit the end of possible results). We do
        # this because a common case for search is to return 100 groups
        # sorted by `last_seen`, and we want to avoid returning all of
        # a project's groups and then post-sorting them all in Postgres
        # when typically the first N results will do.
        while (time.time() - time_start) < max_time:
            num_chunks += 1

            # grow the chunk size on each iteration to account for huge projects
            # and weird queries, up to a max size
            chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
            # but if we have group_ids always query for at least that many items
            chunk_limit = max(chunk_limit, len(group_ids))

            # {group_id: group_score, ...}
            snuba_groups, total = self.snuba_search(
                start=start,
                end=end,
                project_ids=[p.id for p in projects],
                environment_ids=environments
                and [environment.id for environment in environments],
                sort_field=sort_field,
                cursor=cursor,
                group_ids=group_ids,
                limit=chunk_limit,
                offset=offset,
                search_filters=search_filters,
            )
            metrics.timing("snuba.search.num_snuba_results", len(snuba_groups))
            count = len(snuba_groups)
            more_results = count >= limit and (offset + limit) < total
            offset += len(snuba_groups)

            if not snuba_groups:
                break

            if group_ids:
                # pre-filtered candidates were passed down to Snuba, so we're
                # finished with filtering and these are the only results. Note
                # that because we set the chunk size to at least the size of
                # the group_ids, we know we got all of them (ie there are
                # no more chunks after the first)
                result_groups = snuba_groups
                if count_hits and hits is None:
                    hits = len(snuba_groups)
            else:
                # pre-filtered candidates were *not* passed down to Snuba,
                # so we need to do post-filtering to verify Sentry DB predicates
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid
                            for gid, _ in snuba_groups]).values_list("id",
                                                                     flat=True)

                group_to_score = dict(snuba_groups)
                for group_id in filtered_group_ids:
                    if group_id in result_group_ids:
                        # because we're doing multiple Snuba queries, which
                        # happen outside of a transaction, there is a small possibility
                        # of groups moving around in the sort scoring underneath us,
                        # so we at least want to protect against duplicates
                        continue

                    group_score = group_to_score[group_id]
                    result_group_ids.add(group_id)
                    result_groups.append((group_id, group_score))

            # break the query loop for one of three reasons:
            # * we started with Postgres candidates and so only do one Snuba query max
            # * the paginator is returning enough results to satisfy the query (>= the limit)
            # * there are no more groups in Snuba to post-filter
            # TODO do we actually have to rebuild this SequencePaginator every time
            # or can we just make it after we've broken out of the loop?
            paginator_results = SequencePaginator(
                [(score, id) for (id, score) in result_groups],
                reverse=True,
                **paginator_options).get_result(limit,
                                                cursor,
                                                known_hits=hits,
                                                max_hits=max_hits)

            if group_ids or len(
                    paginator_results.results) >= limit or not more_results:
                break

        # HACK: We're using the SequencePaginator to mask the complexities of going
        # back and forth between two databases. This causes a problem with pagination
        # because we're 'lying' to the SequencePaginator (it thinks it has the entire
        # result set in memory when it does not). For this reason we need to make some
        # best guesses as to whether the `prev` and `next` cursors have more results.

        if len(paginator_results.results) == limit and more_results:
            # Because we are going back and forth between DBs there is a small
            # chance that we will hand the SequencePaginator exactly `limit`
            # items. In this case the paginator will assume there are no more
            # results, so we need to override the `next` cursor's results.
            paginator_results.next.has_results = True

        if cursor is not None and (not cursor.is_prev
                                   or len(paginator_results.results) > 0):
            # If the user passed a cursor, and it isn't already a 0 result `is_prev`
            # cursor, then it's worth allowing them to go back a page to check for
            # more results.
            paginator_results.prev.has_results = True

        metrics.timing("snuba.search.num_chunks", num_chunks)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [
            groups[k] for k in paginator_results.results if k in groups
        ]

        return paginator_results
예제 #25
0
def format_search_filter(term, params):
    projects_to_filter = [
    ]  # Used to avoid doing multiple conditions on project ID
    conditions = []
    group_ids = None
    name = term.key.name
    value = term.value.value
    if name in (PROJECT_ALIAS, PROJECT_NAME_ALIAS):
        if term.operator == "=" and value == "":
            raise InvalidSearchQuery(
                "Invalid query for 'has' search: 'project' cannot be empty.")
        slugs = to_list(value)
        projects = {
            p.slug: p.id
            for p in Project.objects.filter(
                id__in=params.get("project_id", []), slug__in=slugs)
        }
        missing = [slug for slug in slugs if slug not in projects]
        if missing and term.operator in EQUALITY_OPERATORS:
            raise InvalidSearchQuery(
                f"Invalid query. Project(s) {', '.join(missing)} do not exist or are not actively selected."
            )
        project_ids = list(sorted(projects.values()))
        if project_ids:
            # Create a new search filter with the correct values
            term = SearchFilter(
                SearchKey("project_id"),
                term.operator,
                SearchValue(
                    project_ids if term.is_in_filter else project_ids[0]),
            )
            converted_filter = convert_search_filter_to_snuba_query(term)
            if converted_filter:
                if term.operator in EQUALITY_OPERATORS:
                    projects_to_filter = project_ids
                conditions.append(converted_filter)
    elif name == ISSUE_ID_ALIAS and value != "":
        # A blank term value means that this is a has filter
        group_ids = to_list(value)
    elif name == ISSUE_ALIAS:
        operator = term.operator
        value = to_list(value)
        # `unknown` is a special value for when there is no issue associated with the event
        group_short_ids = [v for v in value if v and v != "unknown"]
        filter_values = ["" for v in value if not v or v == "unknown"]

        if group_short_ids and params and "organization_id" in params:
            try:
                groups = Group.objects.by_qualified_short_id_bulk(
                    params["organization_id"],
                    group_short_ids,
                )
            except Exception:
                raise InvalidSearchQuery(
                    f"Invalid value '{group_short_ids}' for 'issue:' filter")
            else:
                filter_values.extend(sorted([g.id for g in groups]))

        term = SearchFilter(
            SearchKey("issue.id"),
            operator,
            SearchValue(
                filter_values if term.is_in_filter else filter_values[0]),
        )
        converted_filter = convert_search_filter_to_snuba_query(term)
        conditions.append(converted_filter)
    elif (name == RELEASE_ALIAS and params and
          (value == "latest" or term.is_in_filter and any(v == "latest"
                                                          for v in value))):
        value = [
            parse_release(
                v,
                params["project_id"],
                params.get("environment_objects"),
                params.get("organization_id"),
            ) for v in to_list(value)
        ]

        converted_filter = convert_search_filter_to_snuba_query(
            SearchFilter(
                term.key,
                term.operator,
                SearchValue(value if term.is_in_filter else value[0]),
            ))
        if converted_filter:
            conditions.append(converted_filter)
    else:
        converted_filter = convert_search_filter_to_snuba_query(term,
                                                                params=params)
        if converted_filter:
            conditions.append(converted_filter)

    return conditions, projects_to_filter, group_ids
예제 #26
0
def get_filter(query=None, params=None):
    """
    Returns an eventstore filter given the search text provided by the user and
    URL params
    """
    # NOTE: this function assumes project permissions check already happened
    parsed_terms = []
    if query is not None:
        try:
            parsed_terms = parse_search_query(query,
                                              allow_boolean=True,
                                              params=params)
        except ParseError as e:
            raise InvalidSearchQuery(
                f"Parse error: {e.expr.name} (column {e.column():d})")

    kwargs = {
        "start": None,
        "end": None,
        "conditions": [],
        "having": [],
        "user_id": None,
        "organization_id": None,
        "project_ids": [],
        "group_ids": [],
        "condition_aggregates": [],
        "aliases": params.get("aliases", {}) if params is not None else {},
    }

    projects_to_filter = []
    if any(
            isinstance(term, ParenExpression)
            or SearchBoolean.is_operator(term) for term in parsed_terms):
        (
            condition,
            having,
            found_projects_to_filter,
            group_ids,
        ) = convert_search_boolean_to_snuba_query(parsed_terms, params)

        if condition:
            and_conditions = flatten_condition_tree(condition, SNUBA_AND)
            for func in and_conditions:
                kwargs["conditions"].append(
                    convert_function_to_condition(func))
        if having:
            kwargs["condition_aggregates"] = [
                term.key.name for term in parsed_terms
                if isinstance(term, AggregateFilter)
            ]
            and_having = flatten_condition_tree(having, SNUBA_AND)
            for func in and_having:
                kwargs["having"].append(convert_function_to_condition(func))
        if found_projects_to_filter:
            projects_to_filter = list(set(found_projects_to_filter))
        if group_ids is not None:
            kwargs["group_ids"].extend(list(set(group_ids)))
    else:
        projects_to_filter = set()
        for term in parsed_terms:
            if isinstance(term, SearchFilter):
                conditions, found_projects_to_filter, group_ids = format_search_filter(
                    term, params)
                if len(conditions) > 0:
                    kwargs["conditions"].extend(conditions)
                if found_projects_to_filter:
                    projects_to_filter.update(found_projects_to_filter)
                if group_ids is not None:
                    kwargs["group_ids"].extend(group_ids)
            elif isinstance(term, AggregateFilter):
                converted_filter = convert_aggregate_filter_to_snuba_query(
                    term, params)
                kwargs["condition_aggregates"].append(term.key.name)
                if converted_filter:
                    kwargs["having"].append(converted_filter)
        projects_to_filter = list(projects_to_filter)

    # Keys included as url params take precedent if same key is included in search
    # They are also considered safe and to have had access rules applied unlike conditions
    # from the query string.
    if params:
        for key in ("start", "end"):
            kwargs[key] = params.get(key, None)
        # OrganizationEndpoint.get_filter() uses project_id, but eventstore.Filter uses project_ids
        if "user_id" in params:
            kwargs["user_id"] = params["user_id"]
        if "organization_id" in params:
            kwargs["organization_id"] = params["organization_id"]
        if "project_id" in params:
            if projects_to_filter:
                kwargs["project_ids"] = projects_to_filter
            else:
                kwargs["project_ids"] = params["project_id"]
        if "environment" in params:
            term = SearchFilter(SearchKey("environment"), "=",
                                SearchValue(params["environment"]))
            kwargs["conditions"].append(
                convert_search_filter_to_snuba_query(term))
        if "group_ids" in params:
            kwargs["group_ids"] = to_list(params["group_ids"])
        # Deprecated alias, use `group_ids` instead
        if ISSUE_ID_ALIAS in params:
            kwargs["group_ids"] = to_list(params["issue.id"])

    return eventstore.Filter(**kwargs)
예제 #27
0
def convert_search_boolean_to_snuba_query(terms, params=None):
    if len(terms) == 1:
        return convert_snuba_condition_to_function(terms[0], params)

    # Filter out any ANDs since we can assume anything without an OR is an AND. Also do some
    # basic sanitization of the query: can't have two operators next to each other, and can't
    # start or end a query with an operator.
    prev = None
    new_terms = []
    for term in terms:
        if prev:
            if SearchBoolean.is_operator(prev) and SearchBoolean.is_operator(
                    term):
                raise InvalidSearchQuery(
                    f"Missing condition in between two condition operators: '{prev} {term}'"
                )
        else:
            if SearchBoolean.is_operator(term):
                raise InvalidSearchQuery(
                    f"Condition is missing on the left side of '{term}' operator"
                )

        if term != SearchBoolean.BOOLEAN_AND:
            new_terms.append(term)
        prev = term
    if SearchBoolean.is_operator(term):
        raise InvalidSearchQuery(
            f"Condition is missing on the right side of '{term}' operator")
    terms = new_terms

    # We put precedence on AND, which sort of counter-intuitevely means we have to split the query
    # on ORs first, so the ANDs are grouped together. Search through the query for ORs and split the
    # query on each OR.
    # We want to maintain a binary tree, so split the terms on the first OR we can find and recurse on
    # the two sides. If there is no OR, split the first element out to AND
    index = None
    lhs, rhs = None, None
    operator = None
    try:
        index = terms.index(SearchBoolean.BOOLEAN_OR)
        lhs, rhs = terms[:index], terms[index + 1:]
        operator = SNUBA_OR
    except Exception:
        lhs, rhs = terms[:1], terms[1:]
        operator = SNUBA_AND

    (
        lhs_condition,
        lhs_having,
        projects_to_filter,
        group_ids,
    ) = convert_search_boolean_to_snuba_query(lhs, params)
    (
        rhs_condition,
        rhs_having,
        rhs_projects_to_filter,
        rhs_group_ids,
    ) = convert_search_boolean_to_snuba_query(rhs, params)

    projects_to_filter.extend(rhs_projects_to_filter)
    group_ids.extend(rhs_group_ids)

    if operator == SNUBA_OR and (lhs_condition or
                                 rhs_condition) and (lhs_having or rhs_having):
        raise InvalidSearchQuery(
            "Having an OR between aggregate filters and normal filters is invalid."
        )

    condition, having = None, None
    if lhs_condition or rhs_condition:
        args = filter(None, [lhs_condition, rhs_condition])
        if not args:
            condition = None
        elif len(args) == 1:
            condition = args[0]
        else:
            condition = [operator, args]

    if lhs_having or rhs_having:
        args = filter(None, [lhs_having, rhs_having])
        if not args:
            having = None
        elif len(args) == 1:
            having = args[0]
        else:
            having = [operator, args]

    return condition, having, projects_to_filter, group_ids
예제 #28
0
def convert_search_filter_to_snuba_query(search_filter, key=None, params=None):
    name = search_filter.key.name if key is None else key
    value = search_filter.value.value

    # We want to use group_id elsewhere so shouldn't be removed from the dataset
    # but if a user has a tag with the same name we want to make sure that works
    if name in {"group_id"}:
        name = f"tags[{name}]"

    if name in NO_CONVERSION_FIELDS:
        return
    elif name == "id" and search_filter.value.is_wildcard():
        raise InvalidSearchQuery(
            "Wildcard conditions are not permitted on `id` field.")
    elif name == "environment":
        # conditions added to env_conditions are OR'd
        env_conditions = []

        values = set(value if isinstance(value, (list, tuple)) else [value])
        # the "no environment" environment is null in snuba
        if "" in values:
            values.remove("")
            operator = "IS NULL" if search_filter.operator == "=" else "IS NOT NULL"
            env_conditions.append(["environment", operator, None])
        if len(values) == 1:
            operator = "=" if search_filter.operator in EQUALITY_OPERATORS else "!="
            env_conditions.append(["environment", operator, values.pop()])
        elif values:
            operator = "IN" if search_filter.operator in EQUALITY_OPERATORS else "NOT IN"
            env_conditions.append(["environment", operator, values])
        return env_conditions
    elif name == "message":
        if search_filter.value.is_wildcard():
            # XXX: We don't want the '^$' values at the beginning and end of
            # the regex since we want to find the pattern anywhere in the
            # message. Strip off here
            value = search_filter.value.value[1:-1]
            return [["match", ["message", f"'(?i){value}'"]],
                    search_filter.operator, 1]
        elif value == "":
            operator = "=" if search_filter.operator == "=" else "!="
            return [["equals", ["message", f"{value}"]], operator, 1]
        else:
            # https://clickhouse.yandex/docs/en/query_language/functions/string_search_functions/#position-haystack-needle
            # positionCaseInsensitive returns 0 if not found and an index of 1 or more if found
            # so we should flip the operator here
            operator = "!=" if search_filter.operator in EQUALITY_OPERATORS else "="
            if search_filter.is_in_filter:
                # XXX: This `toString` usage is unnecessary, but we need it in place to
                # trick the legacy Snuba language into not treating `message` as a
                # function. Once we switch over to snql it can be removed.
                return [
                    [
                        "multiSearchFirstPositionCaseInsensitive",
                        [["toString", ["message"]],
                         ["array", [f"'{v}'" for v in value]]],
                    ],
                    operator,
                    0,
                ]

            # make message search case insensitive
            return [["positionCaseInsensitive", ["message", f"'{value}'"]],
                    operator, 0]
    elif name in ARRAY_FIELDS and search_filter.value.is_wildcard():
        # Escape and convert meta characters for LIKE expressions.
        raw_value = search_filter.value.raw_value
        like_value = raw_value.replace("%",
                                       "\\%").replace("_",
                                                      "\\_").replace("*", "%")
        operator = "LIKE" if search_filter.operator == "=" else "NOT LIKE"
        return [name, operator, like_value]
    elif name in ARRAY_FIELDS and search_filter.is_in_filter:
        operator = "=" if search_filter.operator == "IN" else "!="
        # XXX: This `arrayConcat` usage is unnecessary, but we need it in place to
        # trick the legacy Snuba language into not treating `name` as a
        # function. Once we switch over to snql it can be removed.
        return [
            [
                "hasAny",
                [["arrayConcat", [name]], ["array", [f"'{v}'" for v in value]]]
            ],
            operator,
            1,
        ]
    elif name == "transaction.status":
        # Handle "has" queries
        if search_filter.value.raw_value == "":
            return [["isNull", [name]], search_filter.operator, 1]

        if search_filter.is_in_filter:
            internal_value = [
                translate_transaction_status(val)
                for val in search_filter.value.raw_value
            ]
        else:
            internal_value = translate_transaction_status(
                search_filter.value.raw_value)

        return [name, search_filter.operator, internal_value]
    elif name == "issue.id":
        # Handle "has" queries
        if (search_filter.value.raw_value == ""
                or search_filter.is_in_filter and [v for v in value if not v]):
            # The state of having no issues is represented differently on transactions vs
            # other events. On the transactions table, it is represented by 0 whereas it is
            # represented by NULL everywhere else. We use coalesce here so we can treat this
            # consistently
            name = ["coalesce", [name, 0]]
            if search_filter.is_in_filter:
                value = [v if v else 0 for v in value]
            else:
                value = 0

        # Skip isNull check on group_id value as we want to
        # allow snuba's prewhere optimizer to find this condition.
        return [name, search_filter.operator, value]
    elif name == USER_DISPLAY_ALIAS:
        user_display_expr = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_expression(
            params)

        # Handle 'has' condition
        if search_filter.value.raw_value == "":
            return [["isNull", [user_display_expr]], search_filter.operator, 1]
        if search_filter.value.is_wildcard():
            return [
                ["match", [user_display_expr, f"'(?i){value}'"]],
                search_filter.operator,
                1,
            ]
        return [user_display_expr, search_filter.operator, value]
    elif name == ERROR_UNHANDLED_ALIAS:
        # This field is the inversion of error.handled, otherwise the logic is the same.
        if search_filter.value.raw_value == "":
            output = 0 if search_filter.operator == "!=" else 1
            return [["isHandled", []], "=", output]
        if value in ("1", 1):
            return [["notHandled", []], "=", 1]
        if value in ("0", 0):
            return [["isHandled", []], "=", 1]
        raise InvalidSearchQuery(
            "Invalid value for error.unhandled condition. Accepted values are 1, 0"
        )
    elif name == "error.handled":
        # Treat has filter as equivalent to handled
        if search_filter.value.raw_value == "":
            output = 1 if search_filter.operator == "!=" else 0
            return [["isHandled", []], "=", output]
        # Null values and 1 are the same, and both indicate a handled error.
        if value in ("1", 1):
            return [["isHandled", []], "=", 1]
        if value in (
                "0",
                0,
        ):
            return [["notHandled", []], "=", 1]
        raise InvalidSearchQuery(
            "Invalid value for error.handled condition. Accepted values are 1, 0"
        )
    elif name == KEY_TRANSACTION_ALIAS:
        key_transaction_expr = FIELD_ALIASES[
            KEY_TRANSACTION_ALIAS].get_expression(params)

        if search_filter.value.raw_value == "":
            operator = "!=" if search_filter.operator == "!=" else "="
            return [key_transaction_expr, operator, 0]
        if value in ("1", 1):
            return [key_transaction_expr, "=", 1]
        if value in ("0", 0):
            return [key_transaction_expr, "=", 0]
        raise InvalidSearchQuery(
            "Invalid value for key_transaction condition. Accepted values are 1, 0"
        )
    elif name in ARRAY_FIELDS and search_filter.value.raw_value == "":
        return [["notEmpty", [name]], "=",
                1 if search_filter.operator == "!=" else 0]
    else:
        # timestamp{,.to_{hour,day}} need a datetime string
        # last_seen needs an integer
        if isinstance(value, datetime) and name not in {
                "timestamp",
                "timestamp.to_hour",
                "timestamp.to_day",
        }:
            value = int(to_timestamp(value)) * 1000

        # most field aliases are handled above but timestamp.to_{hour,day} are
        # handled here
        if name in FIELD_ALIASES:
            name = FIELD_ALIASES[name].get_expression(params)

        # Tags are never null, but promoted tags are columns and so can be null.
        # To handle both cases, use `ifNull` to convert to an empty string and
        # compare so we need to check for empty values.
        if search_filter.key.is_tag:
            name = ["ifNull", [name, "''"]]

        # Handle checks for existence
        if search_filter.operator in (
                "=", "!=") and search_filter.value.value == "":
            if search_filter.key.is_tag:
                return [name, search_filter.operator, value]
            else:
                # If not a tag, we can just check that the column is null.
                return [["isNull", [name]], search_filter.operator, 1]

        is_null_condition = None
        # TODO(wmak): Skip this for all non-nullable keys not just event.type
        if (search_filter.operator in ("!=", "NOT IN")
                and not search_filter.key.is_tag and name != "event.type"):
            # Handle null columns on inequality comparisons. Any comparison
            # between a value and a null will result to null, so we need to
            # explicitly check for whether the condition is null, and OR it
            # together with the inequality check.
            # We don't need to apply this for tags, since if they don't exist
            # they'll always be an empty string.
            is_null_condition = [["isNull", [name]], "=", 1]

        if search_filter.value.is_wildcard():
            condition = [["match", [name, f"'(?i){value}'"]],
                         search_filter.operator, 1]
        else:
            condition = [name, search_filter.operator, value]

        # We only want to return as a list if we have the check for null
        # present. Returning as a list causes these conditions to be ORed
        # together. Otherwise just return the raw condition, so that it can be
        # used correctly in aggregates.
        if is_null_condition:
            return [is_null_condition, condition]
        else:
            return condition
예제 #29
0
    def get_event_stats_data(
        self,
        request: Request,
        organization: Organization,
        get_event_stats: Callable[
            [Sequence[str], str, Dict[str, str], int, bool, Optional[timedelta]], SnubaTSResult
        ],
        top_events: int = 0,
        query_column: str = "count()",
        params: Optional[Dict[str, Any]] = None,
        query: Optional[str] = None,
        allow_partial_buckets: bool = False,
        zerofill_results: bool = True,
        comparison_delta: Optional[timedelta] = None,
    ) -> Dict[str, Any]:
        with self.handle_query_errors():
            with sentry_sdk.start_span(
                op="discover.endpoint", description="base.stats_query_creation"
            ):
                columns = request.GET.getlist("yAxis", [query_column])
                if query is None:
                    query = request.GET.get("query")
                if params is None:
                    try:
                        # events-stats is still used by events v1 which doesn't require global views
                        params = self.get_snuba_params(
                            request, organization, check_global_views=False
                        )
                    except NoProjects:
                        return {"data": []}

                try:
                    rollup = get_rollup_from_request(
                        request,
                        params,
                        default_interval=None,
                        error=InvalidSearchQuery(),
                        top_events=top_events,
                    )
                # If the user sends an invalid interval, use the default instead
                except InvalidSearchQuery:
                    sentry_sdk.set_tag("user.invalid_interval", request.GET.get("interval"))
                    date_range = params["end"] - params["start"]
                    stats_period = parse_stats_period(get_interval_from_range(date_range, False))
                    rollup = int(stats_period.total_seconds()) if stats_period is not None else 3600

                if comparison_delta is not None:
                    retention = quotas.get_event_retention(organization=organization)
                    comparison_start = params["start"] - comparison_delta
                    if retention and comparison_start < timezone.now() - timedelta(days=retention):
                        raise ValidationError("Comparison period is outside your retention window")

                # Backwards compatibility for incidents which uses the old
                # column aliases as it straddles both versions of events/discover.
                # We will need these aliases until discover2 flags are enabled for all
                # users.
                # We need these rollup columns to generate correct events-stats results
                column_map = {
                    "user_count": "count_unique(user)",
                    "event_count": "count()",
                    "epm()": "epm(%d)" % rollup,
                    "eps()": "eps(%d)" % rollup,
                    "tpm()": "tpm(%d)" % rollup,
                    "tps()": "tps(%d)" % rollup,
                }

                query_columns = [column_map.get(column, column) for column in columns]
            with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_query"):
                result = get_event_stats(
                    query_columns, query, params, rollup, zerofill_results, comparison_delta
                )

        serializer = SnubaTSResultSerializer(organization, None, request.user)

        with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_serialization"):
            # When the request is for top_events, result can be a SnubaTSResult in the event that
            # there were no top events found. In this case, result contains a zerofilled series
            # that acts as a placeholder.
            is_multiple_axis = len(query_columns) > 1
            if top_events > 0 and isinstance(result, dict):
                results = {}
                for key, event_result in result.items():
                    if is_multiple_axis:
                        results[key] = self.serialize_multiple_axis(
                            serializer,
                            event_result,
                            columns,
                            query_columns,
                            allow_partial_buckets,
                            zerofill_results=zerofill_results,
                        )
                    else:
                        # Need to get function alias if count is a field, but not the axis
                        results[key] = serializer.serialize(
                            event_result,
                            column=resolve_axis_column(query_columns[0]),
                            allow_partial_buckets=allow_partial_buckets,
                            zerofill_results=zerofill_results,
                        )
                serialized_result = results
            elif is_multiple_axis:
                serialized_result = self.serialize_multiple_axis(
                    serializer,
                    result,
                    columns,
                    query_columns,
                    allow_partial_buckets,
                    zerofill_results=zerofill_results,
                )
            else:
                extra_columns = None
                if comparison_delta:
                    extra_columns = ["comparisonCount"]
                serialized_result = serializer.serialize(
                    result,
                    resolve_axis_column(query_columns[0]),
                    allow_partial_buckets=allow_partial_buckets,
                    zerofill_results=zerofill_results,
                    extra_columns=extra_columns,
                )

            return serialized_result
예제 #30
0
    def get_event_stats_data(
        self,
        request,
        organization,
        get_event_stats,
        top_events=0,
        query_column="count()",
        params=None,
        query=None,
        allow_partial_buckets=False,
    ):
        with self.handle_query_errors():
            with sentry_sdk.start_span(
                    op="discover.endpoint",
                    description="base.stats_query_creation"):
                columns = request.GET.getlist("yAxis", [query_column])
                if query is None:
                    query = request.GET.get("query")
                if params is None:
                    try:
                        # events-stats is still used by events v1 which doesn't require global views
                        params = self.get_snuba_params(
                            request, organization, check_global_views=False)
                    except NoProjects:
                        return {"data": []}

                rollup = get_rollup_from_request(
                    request,
                    params,
                    default_interval=None,
                    error=InvalidSearchQuery(
                        "Your interval and date range would create too many results. "
                        "Use a larger interval, or a smaller date range."),
                    top_events=top_events,
                )
                # Backwards compatibility for incidents which uses the old
                # column aliases as it straddles both versions of events/discover.
                # We will need these aliases until discover2 flags are enabled for all
                # users.
                # We need these rollup columns to generate correct events-stats results
                column_map = {
                    "user_count": "count_unique(user)",
                    "event_count": "count()",
                    "epm()": "epm(%d)" % rollup,
                    "eps()": "eps(%d)" % rollup,
                    "tpm()": "tpm(%d)" % rollup,
                    "tps()": "tps(%d)" % rollup,
                }

                query_columns = [
                    column_map.get(column, column) for column in columns
                ]
            with sentry_sdk.start_span(op="discover.endpoint",
                                       description="base.stats_query"):
                result = get_event_stats(query_columns, query, params, rollup)

        serializer = SnubaTSResultSerializer(organization, None, request.user)

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="base.stats_serialization"):
            # When the request is for top_events, result can be a SnubaTSResult in the event that
            # there were no top events found. In this case, result contains a zerofilled series
            # that acts as a placeholder.
            if top_events > 0 and isinstance(result, dict):
                results = {}
                for key, event_result in result.items():
                    if len(query_columns) > 1:
                        results[key] = self.serialize_multiple_axis(
                            serializer, event_result, columns, query_columns,
                            allow_partial_buckets)
                    else:
                        # Need to get function alias if count is a field, but not the axis
                        results[key] = serializer.serialize(
                            event_result,
                            column=resolve_axis_column(query_columns[0]),
                            allow_partial_buckets=allow_partial_buckets,
                        )
                return results
            elif len(query_columns) > 1:
                return self.serialize_multiple_axis(serializer, result,
                                                    columns, query_columns,
                                                    allow_partial_buckets)
            else:
                return serializer.serialize(
                    result,
                    resolve_axis_column(query_columns[0]),
                    allow_partial_buckets=allow_partial_buckets,
                )