コード例 #1
0
ファイル: group.py プロジェクト: BigOrange2019630/sentry
    def __init__(self,
                 environment_ids=None,
                 start=None,
                 end=None,
                 search_filters=None):
        from sentry.search.snuba.executors import get_search_filter

        self.environment_ids = environment_ids

        # XXX: We copy this logic from `PostgresSnubaQueryExecutor.query`. Ideally we
        # should try and encapsulate this logic, but if you're changing this, change it
        # there as well.
        self.start = None
        start_params = [
            _f
            for _f in [start,
                       get_search_filter(search_filters, "date", ">")] if _f
        ]
        if start_params:
            self.start = max([_f for _f in start_params if _f])

        self.end = None
        end_params = [
            _f for _f in [end,
                          get_search_filter(search_filters, "date", "<")] if _f
        ]
        if end_params:
            self.end = min(end_params)

        self.conditions = ([
            convert_search_filter_to_snuba_query(search_filter)
            for search_filter in search_filters
            if search_filter.key.name not in self.skip_snuba_fields
        ] if search_filters is not None else [])
コード例 #2
0
 def __init__(self,
              environment_ids=None,
              start=None,
              end=None,
              search_filters=None):
     self.environment_ids = environment_ids
     self.start = start
     self.end = end
     self.conditions = ([
         convert_search_filter_to_snuba_query(search_filter)
         for search_filter in search_filters
         if search_filter.key.name not in self.skip_snuba_fields
     ] if search_filters is not None else [])
コード例 #3
0
ファイル: executors.py プロジェクト: yangyongguan/sentry
def snuba_search(
    start,
    end,
    project_ids,
    environment_ids,
    sort_field,
    cursor=None,
    candidate_ids=None,
    limit=None,
    offset=0,
    get_sample=False,
    search_filters=None,
):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns a tuple of:
     * a sorted list of (group_id, group_score) tuples sorted descending by score,
     * the count of total results (rows) available for this query.
    """
    filters = {"project_id": project_ids}

    if environment_ids is not None:
        filters["environment"] = environment_ids

    if candidate_ids:
        filters["group_id"] = sorted(candidate_ids)

    conditions = []
    having = []
    for search_filter in search_filters:
        if (
                # Don't filter on issue fields here, they're not available
                search_filter.key.name in issue_only_fields or
                # We special case date
                search_filter.key.name == "date"):
            continue
        converted_filter = convert_search_filter_to_snuba_query(search_filter)

        # Ensure that no user-generated tags that clashes with aggregation_defs is added to having
        if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag:
            having.append(converted_filter)
        else:
            conditions.append(converted_filter)

    extra_aggregations = dependency_aggregations.get(sort_field, [])
    required_aggregations = set([sort_field, "total"] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    if cursor is not None:
        having.append(
            (sort_field, ">=" if cursor.is_prev else "<=", cursor.value))

    selected_columns = []
    if get_sample:
        query_hash = md5(repr(conditions)).hexdigest()[:8]
        selected_columns.append(
            ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample"))
        sort_field = "sample"
        orderby = [sort_field]
        referrer = "search_sample"
    else:
        # Get the top matching groups by score, i.e. the actual search results
        # in the order that we want them.
        orderby = ["-{}".format(sort_field),
                   "group_id"]  # ensure stable sort within the same score
        referrer = "search"

    snuba_results = snuba.dataset_query(
        dataset=Dataset.Events,
        start=start,
        end=end,
        selected_columns=selected_columns,
        groupby=["group_id"],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=orderby,
        referrer=referrer,
        limit=limit,
        offset=offset,
        totals=
        True,  # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only
        turbo=get_sample,  # Turn off FINAL when in sampling mode
        sample=1,  # Don't use clickhouse sampling, even when in turbo mode.
    )
    rows = snuba_results["data"]
    total = snuba_results["totals"]["total"]

    if not get_sample:
        metrics.timing("snuba.search.num_result_groups", len(rows))

    return [(row["group_id"], row[sort_field]) for row in rows], total
コード例 #4
0
ファイル: executors.py プロジェクト: snilwx/sentry
    def snuba_search(
        self,
        start,
        end,
        project_ids,
        environment_ids,
        sort_field,
        cursor=None,
        group_ids=None,
        limit=None,
        offset=0,
        get_sample=False,
        search_filters=None,
    ):
        """
        Returns a tuple of:
        * a sorted list of (group_id, group_score) tuples sorted descending by score,
        * the count of total results (rows) available for this query.
        """

        filters = {"project_id": project_ids}

        if environment_ids is not None:
            filters["environment"] = environment_ids

        if group_ids:
            filters["group_id"] = sorted(group_ids)

        conditions = []
        having = []
        for search_filter in search_filters:
            if (
                # Don't filter on postgres fields here, they're not available
                search_filter.key.name in self.postgres_only_fields
                or
                # We special case date
                search_filter.key.name == "date"
            ):
                continue
            converted_filter = convert_search_filter_to_snuba_query(search_filter)
            converted_filter = self._transform_converted_filter(
                search_filter, converted_filter, project_ids, environment_ids
            )
            if converted_filter is not None:
                # Ensure that no user-generated tags that clashes with aggregation_defs is added to having
                if search_filter.key.name in self.aggregation_defs and not search_filter.key.is_tag:
                    having.append(converted_filter)
                else:
                    conditions.append(converted_filter)

        extra_aggregations = self.dependency_aggregations.get(sort_field, [])
        required_aggregations = set([sort_field, "total"] + extra_aggregations)
        for h in having:
            alias = h[0]
            required_aggregations.add(alias)

        aggregations = []
        for alias in required_aggregations:
            aggregation = self.aggregation_defs[alias]
            if callable(aggregation):
                # TODO: If we want to expand this pattern we should probably figure out
                # more generic things to pass here.
                aggregation = aggregation(start, end)
            aggregations.append(aggregation + [alias])

        if cursor is not None:
            having.append((sort_field, ">=" if cursor.is_prev else "<=", cursor.value))

        selected_columns = []
        if get_sample:
            query_hash = md5(json.dumps(conditions).encode("utf-8")).hexdigest()[:8]
            selected_columns.append(
                ("cityHash64", ("'{}'".format(query_hash), "group_id"), "sample")
            )
            sort_field = "sample"
            orderby = [sort_field]
            referrer = "search_sample"
        else:
            # Get the top matching groups by score, i.e. the actual search results
            # in the order that we want them.
            orderby = [
                "-{}".format(sort_field),
                "group_id",
            ]  # ensure stable sort within the same score
            referrer = "search"

        snuba_results = snuba.aliased_query(
            dataset=self.dataset,
            start=start,
            end=end,
            selected_columns=selected_columns,
            groupby=["group_id"],
            conditions=conditions,
            having=having,
            filter_keys=filters,
            aggregations=aggregations,
            orderby=orderby,
            referrer=referrer,
            limit=limit,
            offset=offset,
            totals=True,  # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only
            turbo=get_sample,  # Turn off FINAL when in sampling mode
            sample=1,  # Don't use clickhouse sampling, even when in turbo mode.
            condition_resolver=snuba.get_snuba_column_name,
        )
        rows = snuba_results["data"]
        total = snuba_results["totals"]["total"]

        if not get_sample:
            metrics.timing("snuba.search.num_result_groups", len(rows))

        return [(row["group_id"], row[sort_field]) for row in rows], total
コード例 #5
0
ファイル: backend.py プロジェクト: getsentry/sentry
def snuba_search(start, end, project_ids, environment_ids, sort_field,
                 cursor=None, candidate_ids=None, limit=None, offset=0,
                 get_sample=False, search_filters=None):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns a tuple of:
     * a sorted list of (group_id, group_score) tuples sorted descending by score,
     * the count of total results (rows) available for this query.
    """
    filters = {
        'project_id': project_ids,
    }

    if environment_ids is not None:
        filters['environment'] = environment_ids

    if candidate_ids:
        filters['issue'] = candidate_ids

    conditions = []
    having = []
    for search_filter in search_filters:
        if (
            # Don't filter on issue fields here, they're not available
            search_filter.key.name in issue_only_fields or
            # We special case date
            search_filter.key.name == 'date'
        ):
            continue
        converted_filter = convert_search_filter_to_snuba_query(search_filter)

        # Ensure that no user-generated tags that clashes with aggregation_defs is added to having
        if search_filter.key.name in aggregation_defs and not search_filter.key.is_tag:
            having.append(converted_filter)
        else:
            conditions.append(converted_filter)

    extra_aggregations = dependency_aggregations.get(sort_field, [])
    required_aggregations = set([sort_field, 'total'] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    if cursor is not None:
        having.append((sort_field, '>=' if cursor.is_prev else '<=', cursor.value))

    selected_columns = []
    if get_sample:
        query_hash = md5(repr(conditions)).hexdigest()[:8]
        selected_columns.append(('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample'))
        sort_field = 'sample'
        orderby = [sort_field]
        referrer = 'search_sample'
    else:
        # Get the top matching groups by score, i.e. the actual search results
        # in the order that we want them.
        orderby = ['-{}'.format(sort_field), 'issue']  # ensure stable sort within the same score
        referrer = 'search'

    snuba_results = snuba.raw_query(
        start=start,
        end=end,
        selected_columns=selected_columns,
        groupby=['issue'],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=orderby,
        referrer=referrer,
        limit=limit,
        offset=offset,
        totals=True,  # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only
        turbo=get_sample,  # Turn off FINAL when in sampling mode
        sample=1,  # Don't use clickhouse sampling, even when in turbo mode.
    )
    rows = snuba_results['data']
    total = snuba_results['totals']['total']

    if not get_sample:
        metrics.timing('snuba.search.num_result_groups', len(rows))

    return [(row['issue'], row[sort_field]) for row in rows], total
コード例 #6
0
    def get(self, request, organization):
        """
        List an Organization's Issues
        `````````````````````````````

        Return a list of issues (groups) bound to an organization.  All parameters are
        supplied as query string parameters.

        A default query of ``is:unresolved`` is applied. To return results
        with other statuses send an new query value (i.e. ``?query=`` for all
        results).

        The ``groupStatsPeriod`` parameter can be used to select the timeline
        stats which should be present. Possible values are: '' (disable),
        '24h', '14d'

        The ``statsPeriod`` parameter can be used to select a date window starting
        from now. Ex. ``14d``.

        The ``start`` and ``end`` parameters can be used to select an absolute
        date period to fetch issues from.

        :qparam string statsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string groupStatsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string start:       Beginning date. You must also provide ``end``.
        :qparam string end:         End date. You must also provide ``start``.
        :qparam bool shortIdLookup: if this is set to true then short IDs are
                                    looked up by this function as well.  This
                                    can cause the return value of the function
                                    to return an event issue of a different
                                    project which is why this is an opt-in.
                                    Set to `1` to enable.
        :qparam querystring query: an optional Sentry structured search
                                   query.  If not provided an implied
                                   ``"is:unresolved"`` is assumed.)
        :pparam string organization_slug: the slug of the organization the
                                          issues belong to.
        :auth: required
        """
        stats_period = request.GET.get("groupStatsPeriod")
        if stats_period not in (None, "", "24h", "14d"):
            return Response({"detail": ERR_INVALID_STATS_PERIOD}, status=400)
        elif stats_period is None:
            # default
            stats_period = "24h"
        elif stats_period == "":
            # disable stats
            stats_period = None

        try:
            start, end = get_date_range_from_params(request.GET)
        except InvalidParams as e:
            return Response({"detail": six.text_type(e)}, status=400)

        environments = self.get_environments(request, organization)

        serializer = functools.partial(
            StreamGroupSerializerSnuba,
            environment_ids=[env.id for env in environments],
            stats_period=stats_period,
        )

        projects = self.get_projects(request, organization)
        project_ids = [p.id for p in projects]

        if not projects:
            return Response([])

        if len(projects) > 1 and not features.has("organizations:global-views",
                                                  organization,
                                                  actor=request.user):
            return Response(
                {
                    "detail":
                    "You do not have the multi project stream feature enabled"
                },
                status=400)

        # we ignore date range for both short id and event ids
        query = request.GET.get("query", "").strip()
        if query:
            # check to see if we've got an event ID
            event_id = normalize_event_id(query)
            if event_id:
                # For a direct hit lookup we want to use any passed project ids
                # (we've already checked permissions on these) plus any other
                # projects that the user is a member of. This gives us a better
                # chance of returning the correct result, even if the wrong
                # project is selected.
                direct_hit_projects = set(project_ids) | set(
                    [project.id for project in request.access.projects])
                groups = list(
                    Group.objects.filter_by_event_id(direct_hit_projects,
                                                     event_id))
                if len(groups) == 1:
                    response = Response(
                        serialize(groups, request.user,
                                  serializer(matching_event_id=event_id)))
                    response["X-Sentry-Direct-Hit"] = "1"
                    return response

                if groups:
                    return Response(
                        serialize(groups, request.user, serializer()))

            group = get_by_short_id(organization.id,
                                    request.GET.get("shortIdLookup"), query)
            if group is not None:
                # check all projects user has access to
                if request.access.has_project_access(group.project):
                    response = Response(
                        serialize([group], request.user, serializer()))
                    response["X-Sentry-Direct-Hit"] = "1"
                    return response

        # If group ids specified, just ignore any query components
        try:
            group_ids = set(map(int, request.GET.getlist("group")))
        except ValueError:
            return Response({"detail": "Group ids must be integers"},
                            status=400)

        if group_ids:
            groups = list(
                Group.objects.filter(id__in=group_ids,
                                     project_id__in=project_ids))
            if any(g for g in groups
                   if not request.access.has_project_access(g.project)):
                raise PermissionDenied
            return Response(serialize(groups, request.user, serializer()))

        try:
            cursor_result, query_kwargs = self._search(
                request,
                organization,
                projects,
                environments,
                {
                    "count_hits": True,
                    "date_to": end,
                    "date_from": start
                },
            )
        except (ValidationError, discover.InvalidSearchQuery) as exc:
            return Response({"detail": six.text_type(exc)}, status=400)

        results = list(cursor_result)

        lifetime_stats = serialize(results, request.user, serializer())

        if features.has("organizations:dynamic-issue-counts",
                        organization,
                        actor=request.user):
            snuba_filters = []
            if "search_filters" in query_kwargs and query_kwargs[
                    "search_filters"] is not None:
                snuba_filters = [
                    convert_search_filter_to_snuba_query(search_filter)
                    for search_filter in query_kwargs["search_filters"]
                    if search_filter.key.name not in self.skip_snuba_fields
                ]

            context = serialize(results, request.user,
                                serializer(start=start, end=end))
            if snuba_filters:
                filtered_stats = serialize(
                    results,
                    request.user,
                    serializer(start=start,
                               end=end,
                               snuba_filters=snuba_filters),
                )
            else:
                filtered_stats = None
            for idx, ctx in enumerate(context):
                ctx["lifetime"] = lifetime_stats[idx]
                if snuba_filters:
                    ctx["filtered"] = filtered_stats[idx]
        else:
            # context was the lifetime stats previously with no filters/dynamic start-end values
            context = lifetime_stats

        # HACK: remove auto resolved entries
        # TODO: We should try to integrate this into the search backend, since
        # this can cause us to arbitrarily return fewer results than requested.
        status = [
            search_filter
            for search_filter in query_kwargs.get("search_filters", [])
            if search_filter.key.name == "status"
        ]
        if status and status[0].value.raw_value == GroupStatus.UNRESOLVED:
            context = [r for r in context if r["status"] == "unresolved"]

        response = Response(context)

        self.add_cursor_headers(request, response, cursor_result)

        # TODO(jess): add metrics that are similar to project endpoint here
        return response
コード例 #7
0
def snuba_search(start,
                 end,
                 project_ids,
                 environment_ids,
                 tags,
                 sort_field,
                 cursor=None,
                 candidate_ids=None,
                 limit=None,
                 offset=0,
                 get_sample=False,
                 search_filters=None,
                 use_new_filters=False,
                 **parameters):
    """
    This function doesn't strictly benefit from or require being pulled out of the main
    query method above, but the query method is already large and this function at least
    extracts most of the Snuba-specific logic.

    Returns a tuple of:
     * a sorted list of (group_id, group_score) tuples sorted descending by score,
     * the count of total results (rows) available for this query.
    """

    from sentry.search.base import ANY

    filters = {
        'project_id': project_ids,
    }

    if environment_ids is not None:
        filters['environment'] = environment_ids

    if candidate_ids is not None:
        filters['issue'] = candidate_ids

    conditions = []
    if use_new_filters:
        having = []
        for search_filter in search_filters:
            if search_filter.key.name in issue_only_fields:
                # Don't filter on issue fields here, they're not available
                continue
            converted_filter = convert_search_filter_to_snuba_query(
                search_filter)
            if search_filter.key.name in aggregation_defs:
                having.append(converted_filter)
            else:
                conditions.append(converted_filter)
    else:
        having = SnubaConditionBuilder({
            'age_from':
            ScalarCondition('first_seen', '>'),
            'age_to':
            ScalarCondition('first_seen', '<'),
            'last_seen_from':
            ScalarCondition('last_seen', '>'),
            'last_seen_to':
            ScalarCondition('last_seen', '<'),
            'times_seen':
            CallbackCondition(
                lambda times_seen: ('times_seen', '=', times_seen), ),
            'times_seen_lower':
            ScalarCondition('times_seen', '>'),
            'times_seen_upper':
            ScalarCondition('times_seen', '<'),
        }).build(parameters)

        for tag, val in sorted(tags.items()):
            col = u'tags[{}]'.format(tag)
            if val == ANY:
                conditions.append((col, '!=', ''))
            else:
                conditions.append((col, '=', val))

    extra_aggregations = dependency_aggregations.get(sort_field, [])
    required_aggregations = set([sort_field, 'total'] + extra_aggregations)
    for h in having:
        alias = h[0]
        required_aggregations.add(alias)

    aggregations = []
    for alias in required_aggregations:
        aggregations.append(aggregation_defs[alias] + [alias])

    if cursor is not None:
        having.append(
            (sort_field, '>=' if cursor.is_prev else '<=', cursor.value))

    selected_columns = []
    if get_sample:
        # Get a random sample of matching groups. Because we use any(rand()),
        # we are testing against a single random value per group, and so the
        # sample is independent of the number of events in a group. Since we
        # are sampling using `ORDER by random() LIMIT x`, we will always grab
        # the full result set if there less than x total results.

        query_hash = md5(repr(conditions)).hexdigest()[:8]
        selected_columns.append(
            ('cityHash64', ("'{}'".format(query_hash), 'issue'), 'sample'))
        sort_field = 'sample'
        orderby = [sort_field]
        referrer = 'search_sample'
    else:
        # Get the top matching groups by score, i.e. the actual search results
        # in the order that we want them.
        orderby = ['-{}'.format(sort_field),
                   'issue']  # ensure stable sort within the same score
        referrer = 'search'

    snuba_results = snuba.raw_query(
        start=start,
        end=end,
        selected_columns=selected_columns,
        groupby=['issue'],
        conditions=conditions,
        having=having,
        filter_keys=filters,
        aggregations=aggregations,
        orderby=orderby,
        referrer=referrer,
        limit=limit,
        offset=offset,
        totals=
        True,  # Needs to have totals_mode=after_having_exclusive so we get groups matching HAVING only
        turbo=get_sample,  # Turn off FINAL when in sampling mode
        sample=1,  # Don't use clickhouse sampling, even when in turbo mode.
    )
    rows = snuba_results['data']
    total = snuba_results['totals']['total']

    if not get_sample:
        metrics.timing('snuba.search.num_result_groups', len(rows))

    return [(row['issue'], row[sort_field]) for row in rows], total