예제 #1
0
    def _post_by_key(self, request, full_config_requested):
        public_keys = request.relay_request_data.get("publicKeys")
        public_keys = set(public_keys or ())

        project_keys = {}  # type: dict[str, ProjectKey]
        project_ids = set()  # type: set[int]

        with start_span(op="relay_fetch_keys"):
            with metrics.timer("relay_project_configs.fetching_keys.duration"):
                for key in ProjectKey.objects.get_many_from_cache(
                        public_keys, key="public_key"):
                    if key.status != ProjectKeyStatus.ACTIVE:
                        continue

                    project_keys[key.public_key] = key
                    project_ids.add(key.project_id)

        projects = {}  # type: dict[int, Project]
        organization_ids = set()  # type: set[int]

        with start_span(op="relay_fetch_projects"):
            with metrics.timer(
                    "relay_project_configs.fetching_projects.duration"):
                for project in Project.objects.get_many_from_cache(
                        project_ids):
                    projects[project.id] = project
                    organization_ids.add(project.organization_id)

        # Preload all organizations and their options to prevent repeated
        # database access when computing the project configuration.

        orgs = {}  # type: dict[int, Organization]

        with start_span(op="relay_fetch_orgs"):
            with metrics.timer("relay_project_configs.fetching_orgs.duration"):
                for org in Organization.objects.get_many_from_cache(
                        organization_ids):
                    if request.relay.has_org_access(org):
                        orgs[org.id] = org

        with start_span(op="relay_fetch_org_options"):
            with metrics.timer(
                    "relay_project_configs.fetching_org_options.duration"):
                for org_id in orgs:
                    OrganizationOption.objects.get_all_values(org_id)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for public_key in public_keys:
            configs[public_key] = {"disabled": True}

            key = project_keys.get(public_key)
            if key is None:
                continue

            project = projects.get(key.project_id)
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Prevent organization from being fetched again in quotas.
            project.set_cached_field_value("organization", organization)

            with Hub.current.start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        full_config=full_config_requested,
                        project_keys=[key],
                    )

            configs[public_key] = project_config.to_dict()

        if full_config_requested:
            projectconfig_cache.set_many(configs)

        return Response({"configs": configs}, status=200)
예제 #2
0
    def get_event_stats_data(self,
                             request,
                             organization,
                             get_event_stats,
                             top_events=False):
        with self.handle_query_errors():
            with sentry_sdk.start_span(
                    op="discover.endpoint",
                    description="base.stats_query_creation"):
                columns = request.GET.getlist("yAxis", ["count()"])
                query = request.GET.get("query")
                try:
                    params = self.get_filter_params(request, organization)
                except NoProjects:
                    return {"data": []}
                params = self.quantize_date_params(request, params)
                rollup = get_rollup_from_request(
                    request,
                    params,
                    "1h",
                    InvalidSearchQuery(
                        "Your interval and date range would create too many results. "
                        "Use a larger interval, or a smaller date range."),
                )
                # Backwards compatibility for incidents which uses the old
                # column aliases as it straddles both versions of events/discover.
                # We will need these aliases until discover2 flags are enabled for all
                # users.
                column_map = {
                    "user_count": "count_unique(user)",
                    "event_count": "count()",
                    "epm()": "epm(%d)" % rollup,
                    "eps()": "eps(%d)" % rollup,
                }
                query_columns = [
                    column_map.get(column, column) for column in columns
                ]
                reference_event = self.reference_event(request, organization,
                                                       params.get("start"),
                                                       params.get("end"))

            with sentry_sdk.start_span(op="discover.endpoint",
                                       description="base.stats_query"):
                result = get_event_stats(query_columns, query, params, rollup,
                                         reference_event)

        serializer = SnubaTSResultSerializer(organization, None, request.user)

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="base.stats_serialization"):
            if top_events:
                results = {}
                for key, event_result in six.iteritems(result):
                    if len(query_columns) > 1:
                        results[key] = self.serialize_multiple_axis(
                            serializer, event_result, columns, query_columns)
                    else:
                        # Need to get function alias if count is a field, but not the axis
                        results[key] = serializer.serialize(
                            event_result, get_function_alias(query_columns[0]))
                return results
            elif len(query_columns) > 1:
                return self.serialize_multiple_axis(serializer, result,
                                                    columns, query_columns)
            else:
                return serializer.serialize(result)
    def handle_message(self, message):
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                with metrics.timer(
                        "snuba_query_subscriber.parse_message_value"):
                    contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                with metrics.timer(
                        "snuba_query_subscriber.fetch_subscription"):
                    subscription = QuerySubscription.objects.get_from_cache(
                        subscription_id=contents["subscription_id"])
                    if subscription.status != QuerySubscription.Status.ACTIVE.value:
                        metrics.incr(
                            "snuba_query_subscriber.subscription_inactive")
                        return
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    _delete_from_snuba(self.topic_to_dataset[message.topic()],
                                       contents["subscription_id"])
                except Exception:
                    logger.exception(
                        "Failed to delete unused subscription from snuba.")
                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            logger.debug(
                "query-subscription-consumer.handle_message",
                extra={
                    "timestamp": contents["timestamp"],
                    "query_subscription_id": contents["subscription_id"],
                    "project_id": subscription.project_id,
                    "subscription_dataset": subscription.snuba_query.dataset,
                    "subscription_query": subscription.snuba_query.query,
                    "subscription_aggregation":
                    subscription.snuba_query.aggregate,
                    "subscription_time_window":
                    subscription.snuba_query.time_window,
                    "subscription_resolution":
                    subscription.snuba_query.resolution,
                    "offset": message.offset(),
                    "partition": message.partition(),
                    "value": message.value(),
                },
            )

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message") as span, metrics.timer(
                        "snuba_query_subscriber.callback.duration",
                        instance=subscription.type):
                span.set_data("payload", contents)
                callback(contents, subscription)
예제 #4
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
    conditions=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")
    else:
        # We clobber this value throughout this code, so copy the value
        selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            snuba_filter.having = []

    # We need to run a separate query to be able to properly bucket the values for the histogram
    # Do that here, and format the bucket number in to the columns before passing it through
    # to event search.
    idx = 0
    function_translations = {}
    for col in selected_columns:
        if col.startswith("histogram("):
            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="query.histogram_calculation") as span:
                span.set_data("histogram", col)
                histogram_column = find_histogram_buckets(
                    col, params, snuba_filter.conditions)
                selected_columns[idx] = histogram_column
                snuba_name = get_function_alias(histogram_column)
                sentry_name = get_function_alias(col)
                function_translations[snuba_name] = sentry_name
                # Since we're completely renaming the histogram function, we need to also check if we are
                # ordering by the histogram values, and change that.
                if orderby is not None:
                    orderby = list(orderby) if isinstance(
                        orderby, (list, tuple)) else [orderby]
                    for i, ordering in enumerate(orderby):
                        if sentry_name == ordering.lstrip("-"):
                            ordering = "{}{}".format(
                                "-" if ordering.startswith("-") else "",
                                snuba_name)
                            orderby[i] = ordering

            break

        idx += 1

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        snuba_filter.update_with(
            resolve_field_list(selected_columns,
                               snuba_filter,
                               auto_fields=auto_fields))

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        else:
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        u"Aggregate(s) {} used in a condition but are not in the selected columns."
                        .format(", ".join(conditions_not_in_aggregations)))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        u"Aggregate {} used in a condition but is not a selected column."
                        .format(having_clause[0]))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, translated_columns, snuba_filter,
                                 selected_columns)
예제 #5
0
def get_facets(query, params, limit=10, referrer=None):
    """
    High-level API for getting 'facet map' results.

    Facets are high frequency tags and attribute results that
    can be used to further refine user queries. When many projects
    are requested sampling will be enabled to help keep response times low.

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter = get_filter(query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

    # Exclude tracing tags as they are noisy and generally not helpful.
    excluded_tags = [
        "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"]
    ]

    # Sampling keys for multi-project results as we don't need accuracy
    # with that much data.
    sample = len(snuba_filter.filter_keys["project_id"]) > 2

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the most frequent tag keys
        key_names = raw_query(
            aggregations=[["count", None, "count"]],
            start=snuba_filter.start,
            end=snuba_filter.end,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            orderby=["-count", "tags_key"],
            groupby="tags_key",
            having=[excluded_tags],
            dataset=Dataset.Discover,
            limit=limit,
            referrer=referrer,
            turbo=sample,
        )
        top_tags = [r["tags_key"] for r in key_names["data"]]
        if not top_tags:
            return []

    # TODO(mark) Make the sampling rate scale based on the result size and scaling factor in
    # sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which
    # is equivalent to turbo. We don't use turbo though as we need to re-scale data, and
    # using turbo could cause results to be wrong if the value of turbo is changed in snuba.
    sampling_enabled = options.get("discover2.tags_facet_enable_sampling")
    sample_rate = 0.1 if (sampling_enabled
                          and key_names["data"][0]["count"] > 10000) else None
    # Rescale the results if we're sampling
    multiplier = 1 / sample_rate if sample_rate is not None else 1

    fetch_projects = False
    if len(params.get("project_id", [])) > 1:
        if len(top_tags) == limit:
            top_tags.pop()
        fetch_projects = True

    results = []
    if fetch_projects:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="facets.projects"):
            project_values = raw_query(
                aggregations=[["count", None, "count"]],
                start=snuba_filter.start,
                end=snuba_filter.end,
                conditions=snuba_filter.conditions,
                filter_keys=snuba_filter.filter_keys,
                groupby="project_id",
                orderby="-count",
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
            )
            results.extend([
                FacetResult("project", r["project_id"],
                            int(r["count"]) * multiplier)
                for r in project_values["data"]
            ])

    # Get tag counts for our top tags. Fetching them individually
    # allows snuba to leverage promoted tags better and enables us to get
    # the value count we want.
    max_aggregate_tags = options.get("discover2.max_tags_to_combine")
    individual_tags = []
    aggregate_tags = []
    for i, tag in enumerate(top_tags):
        if tag == "environment":
            # Add here tags that you want to be individual
            individual_tags.append(tag)
        elif i >= len(top_tags) - max_aggregate_tags:
            aggregate_tags.append(tag)
        else:
            individual_tags.append(tag)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.individual_tags") as span:
        span.set_data("tag_count", len(individual_tags))
        for tag_name in individual_tags:
            tag = u"tags[{}]".format(tag_name)
            tag_values = raw_query(
                aggregations=[["count", None, "count"]],
                conditions=snuba_filter.conditions,
                start=snuba_filter.start,
                end=snuba_filter.end,
                filter_keys=snuba_filter.filter_keys,
                orderby=["-count"],
                groupby=[tag],
                limit=TOP_VALUES_DEFAULT_LIMIT,
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
            )
            results.extend([
                FacetResult(tag_name, r[tag],
                            int(r["count"]) * multiplier)
                for r in tag_values["data"]
            ])

    if aggregate_tags:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="facets.aggregate_tags"):
            conditions = snuba_filter.conditions
            conditions.append(["tags_key", "IN", aggregate_tags])
            tag_values = raw_query(
                aggregations=[["count", None, "count"]],
                conditions=conditions,
                start=snuba_filter.start,
                end=snuba_filter.end,
                filter_keys=snuba_filter.filter_keys,
                orderby=["tags_key", "-count"],
                groupby=["tags_key", "tags_value"],
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
                limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"],
            )
            results.extend([
                FacetResult(r["tags_key"], r["tags_value"],
                            int(r["count"]) * multiplier)
                for r in tag_values["data"]
            ])

    return results
예제 #6
0
def post_process_group(is_new,
                       is_regression,
                       is_new_group_environment,
                       cache_key,
                       group_id=None,
                       **kwargs):
    """
    Fires post processing hooks for a group.
    """
    from sentry.eventstore.models import Event
    from sentry.eventstore.processing import event_processing_store
    from sentry.reprocessing2 import is_reprocessed_event
    from sentry.utils import snuba

    with snuba.options_override({"consistent": True}):
        # We use the data being present/missing in the processing store
        # to ensure that we don't duplicate work should the forwarding consumers
        # need to rewind history.
        data = event_processing_store.get(cache_key)
        if not data:
            logger.info(
                "post_process.skipped",
                extra={
                    "cache_key": cache_key,
                    "reason": "missing_cache"
                },
            )
            return
        event = Event(project_id=data["project"],
                      event_id=data["event_id"],
                      group_id=group_id,
                      data=data)

        set_current_event_project(event.project_id)

        is_transaction_event = not bool(event.group_id)

        from sentry.models import EventDict, Organization, Project

        # Re-bind node data to avoid renormalization. We only want to
        # renormalize when loading old data from the database.
        event.data = EventDict(event.data, skip_renormalization=True)

        # Re-bind Project and Org since we're reading the Event object
        # from cache which may contain stale parent models.
        event.project = Project.objects.get_from_cache(id=event.project_id)
        event.project.set_cached_field_value(
            "organization",
            Organization.objects.get_from_cache(
                id=event.project.organization_id))

        # Simplified post processing for transaction events.
        # This should eventually be completely removed and transactions
        # will not go through any post processing.
        if is_transaction_event:
            transaction_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
            )

            event_processing_store.delete_by_key(cache_key)

            return

        is_reprocessed = is_reprocessed_event(event.data)
        sentry_sdk.set_tag("is_reprocessed", is_reprocessed)

        # NOTE: we must pass through the full Event object, and not an
        # event_id since the Event object may not actually have been stored
        # in the database due to sampling.
        from sentry.models import Commit, GroupInboxReason
        from sentry.models.group import get_group_with_redirect
        from sentry.models.groupinbox import add_group_to_inbox
        from sentry.rules.processor import RuleProcessor
        from sentry.tasks.groupowner import process_suspect_commits
        from sentry.tasks.servicehooks import process_service_hook

        # Re-bind Group since we're reading the Event object
        # from cache, which may contain a stale group and project
        event.group, _ = get_group_with_redirect(event.group_id)
        event.group_id = event.group.id

        event.group.project = event.project
        event.group.project.set_cached_field_value("organization",
                                                   event.project.organization)

        bind_organization_context(event.project.organization)

        _capture_stats(event, is_new)

        with sentry_sdk.start_span(
                op="tasks.post_process_group.add_group_to_inbox"):
            try:
                if is_reprocessed and is_new:
                    add_group_to_inbox(event.group,
                                       GroupInboxReason.REPROCESSED)
            except Exception:
                logger.exception(
                    "Failed to add group to inbox for reprocessed groups")

        if not is_reprocessed:
            # we process snoozes before rules as it might create a regression
            # but not if it's new because you can't immediately snooze a new group
            has_reappeared = not is_new
            try:
                if has_reappeared:
                    has_reappeared = process_snoozes(event.group)
            except Exception:
                logger.exception("Failed to process snoozes for group")

            try:
                if not has_reappeared:  # If true, we added the .UNIGNORED reason already
                    if is_new:
                        add_group_to_inbox(event.group, GroupInboxReason.NEW)
                    elif is_regression:
                        add_group_to_inbox(event.group,
                                           GroupInboxReason.REGRESSION)
            except Exception:
                logger.exception(
                    "Failed to add group to inbox for non-reprocessed groups")

            with sentry_sdk.start_span(
                    op="tasks.post_process_group.handle_owner_assignment"):
                try:
                    handle_owner_assignment(event.project, event.group, event)
                except Exception:
                    logger.exception("Failed to handle owner assignments")

            rp = RuleProcessor(event, is_new, is_regression,
                               is_new_group_environment, has_reappeared)
            has_alert = False
            with sentry_sdk.start_span(
                    op="tasks.post_process_group.rule_processor_callbacks"):
                # TODO(dcramer): ideally this would fanout, but serializing giant
                # objects back and forth isn't super efficient
                for callback, futures in rp.apply():
                    has_alert = True
                    safe_execute(callback,
                                 event,
                                 futures,
                                 _with_transaction=False)

            try:
                lock = locks.get(
                    f"w-o:{event.group_id}-d-l",
                    duration=10,
                )
                with lock.acquire():
                    has_commit_key = f"w-o:{event.project.organization_id}-h-c"
                    org_has_commit = cache.get(has_commit_key)
                    if org_has_commit is None:
                        org_has_commit = Commit.objects.filter(
                            organization_id=event.project.organization_id
                        ).exists()
                        cache.set(has_commit_key, org_has_commit, 3600)

                    if org_has_commit:
                        group_cache_key = f"w-o-i:g-{event.group_id}"
                        if cache.get(group_cache_key):
                            metrics.incr(
                                "sentry.tasks.process_suspect_commits.debounce",
                                tags={"detail": "w-o-i:g debounce"},
                            )
                        else:
                            from sentry.utils.committers import get_frame_paths

                            cache.set(group_cache_key, True,
                                      604800)  # 1 week in seconds
                            event_frames = get_frame_paths(event.data)
                            process_suspect_commits.delay(
                                event_id=event.event_id,
                                event_platform=event.platform,
                                event_frames=event_frames,
                                group_id=event.group_id,
                                project_id=event.project_id,
                            )
            except UnableToAcquireLock:
                pass
            except Exception:
                logger.exception("Failed to process suspect commits")

            if features.has("projects:servicehooks", project=event.project):
                allowed_events = {"event.created"}
                if has_alert:
                    allowed_events.add("event.alert")

                if allowed_events:
                    for servicehook_id, events in _get_service_hooks(
                            project_id=event.project_id):
                        if any(e in allowed_events for e in events):
                            process_service_hook.delay(
                                servicehook_id=servicehook_id, event=event)

            from sentry.tasks.sentry_apps import process_resource_change_bound

            if event.get_event_type(
            ) == "error" and _should_send_error_created_hooks(event.project):
                process_resource_change_bound.delay(action="created",
                                                    sender="Error",
                                                    instance_id=event.event_id,
                                                    instance=event)
            if is_new:
                process_resource_change_bound.delay(action="created",
                                                    sender="Group",
                                                    instance_id=event.group_id)

            from sentry.plugins.base import plugins

            for plugin in plugins.for_project(event.project):
                plugin_post_process_group(plugin_slug=plugin.slug,
                                          event=event,
                                          is_new=is_new,
                                          is_regresion=is_regression)

            from sentry import similarity

            with sentry_sdk.start_span(
                    op="tasks.post_process_group.similarity"):
                safe_execute(similarity.record,
                             event.project, [event],
                             _with_transaction=False)

        # Patch attachments that were ingested on the standalone path.
        with sentry_sdk.start_span(
                op="tasks.post_process_group.update_existing_attachments"):
            try:
                update_existing_attachments(event)
            except Exception:
                logger.exception("Failed to update existing attachments")

        if not is_reprocessed:
            event_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
                primary_hash=kwargs.get("primary_hash"),
            )

        with metrics.timer("tasks.post_process.delete_event_cache"):
            event_processing_store.delete_by_key(cache_key)
예제 #7
0
    def query(
        self,
        projects,
        retention_window_start,
        group_queryset,
        environments,
        sort_by,
        limit,
        cursor,
        count_hits,
        paginator_options,
        search_filters,
        date_from,
        date_to,
    ):

        now = timezone.now()
        end = None
        end_params = [
            _f for _f in
            [date_to, get_search_filter(search_filters, "date", "<")] if _f
        ]
        if end_params:
            end = min(end_params)

        if not end:
            end = now + ALLOWED_FUTURE_DELTA

            # This search is for some time window that ends with "now",
            # so if the requested sort is `date` (`last_seen`) and there
            # are no other Snuba-based search predicates, we can simply
            # return the results from Postgres.
            if (cursor is None and sort_by == "date" and not environments and
                    # This handles tags and date parameters for search filters.
                    not [
                        sf for sf in search_filters if sf.key.name not in
                        self.postgres_only_fields.union(["date"])
                    ]):
                group_queryset = group_queryset.order_by("-last_seen")
                paginator = DateTimePaginator(group_queryset, "-last_seen",
                                              **paginator_options)
                # When its a simple django-only search, we count_hits like normal
                return paginator.get_result(limit,
                                            cursor,
                                            count_hits=count_hits)

        # TODO: Presumably we only want to search back to the project's max
        # retention date, which may be closer than 90 days in the past, but
        # apparently `retention_window_start` can be None(?), so we need a
        # fallback.
        retention_date = max([
            _f for _f in [retention_window_start, now - timedelta(days=90)]
            if _f
        ])
        start_params = [
            date_from, retention_date,
            get_search_filter(search_filters, "date", ">")
        ]
        start = max([_f for _f in start_params if _f])
        end = max([retention_date, end])

        if start == retention_date and end == retention_date:
            # Both `start` and `end` must have been trimmed to `retention_date`,
            # so this entire search was against a time range that is outside of
            # retention. We'll return empty results to maintain backwards compatibility
            # with Django search (for now).
            return self.empty_result

        if start >= end:
            # TODO: This maintains backwards compatibility with Django search, but
            # in the future we should find a way to notify the user that their search
            # is invalid.
            return self.empty_result

        # Here we check if all the django filters reduce the set of groups down
        # to something that we can send down to Snuba in a `group_id IN (...)`
        # clause.
        max_candidates = options.get("snuba.search.max-pre-snuba-candidates")

        with sentry_sdk.start_span(op="snuba_group_query") as span:
            group_ids = list(
                group_queryset.values_list("id",
                                           flat=True)[:max_candidates + 1])
            span.set_data("Max Candidates", max_candidates)
            span.set_data("Result Size", len(group_ids))
        metrics.timing("snuba.search.num_candidates", len(group_ids))

        too_many_candidates = False
        if not group_ids:
            # no matches could possibly be found from this point on
            metrics.incr("snuba.search.no_candidates", skip_internal=False)
            return self.empty_result
        elif len(group_ids) > max_candidates:
            # If the pre-filter query didn't include anything to significantly
            # filter down the number of results (from 'first_release', 'query',
            # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
            # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
            # might have surpassed the `max_candidates`. In this case,
            # we *don't* want to pass candidates down to Snuba, and instead we
            # want Snuba to do all the filtering/sorting it can and *then* apply
            # this queryset to the results from Snuba, which we call
            # post-filtering.
            metrics.incr("snuba.search.too_many_candidates",
                         skip_internal=False)
            too_many_candidates = True
            group_ids = []

        sort_field = self.sort_strategies[sort_by]
        chunk_growth = options.get("snuba.search.chunk-growth-rate")
        max_chunk_size = options.get("snuba.search.max-chunk-size")
        chunk_limit = limit
        offset = 0
        num_chunks = 0
        hits = self.calculate_hits(
            group_ids,
            too_many_candidates,
            sort_field,
            projects,
            retention_window_start,
            group_queryset,
            environments,
            sort_by,
            limit,
            cursor,
            count_hits,
            paginator_options,
            search_filters,
            start,
            end,
        )
        if count_hits and hits == 0:
            return self.empty_result

        paginator_results = self.empty_result
        result_groups = []
        result_group_ids = set()

        max_time = options.get("snuba.search.max-total-chunk-time-seconds")
        time_start = time.time()

        # Do smaller searches in chunks until we have enough results
        # to answer the query (or hit the end of possible results). We do
        # this because a common case for search is to return 100 groups
        # sorted by `last_seen`, and we want to avoid returning all of
        # a project's groups and then post-sorting them all in Postgres
        # when typically the first N results will do.
        while (time.time() - time_start) < max_time:
            num_chunks += 1

            # grow the chunk size on each iteration to account for huge projects
            # and weird queries, up to a max size
            chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
            # but if we have group_ids always query for at least that many items
            chunk_limit = max(chunk_limit, len(group_ids))

            # {group_id: group_score, ...}
            snuba_groups, total = self.snuba_search(
                start=start,
                end=end,
                project_ids=[p.id for p in projects],
                environment_ids=environments
                and [environment.id for environment in environments],
                sort_field=sort_field,
                cursor=cursor,
                group_ids=group_ids,
                limit=chunk_limit,
                offset=offset,
                search_filters=search_filters,
            )
            metrics.timing("snuba.search.num_snuba_results", len(snuba_groups))
            count = len(snuba_groups)
            more_results = count >= limit and (offset + limit) < total
            offset += len(snuba_groups)

            if not snuba_groups:
                break

            if group_ids:
                # pre-filtered candidates were passed down to Snuba, so we're
                # finished with filtering and these are the only results. Note
                # that because we set the chunk size to at least the size of
                # the group_ids, we know we got all of them (ie there are
                # no more chunks after the first)
                result_groups = snuba_groups
                if count_hits and hits is None:
                    hits = len(snuba_groups)
            else:
                # pre-filtered candidates were *not* passed down to Snuba,
                # so we need to do post-filtering to verify Sentry DB predicates
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid
                            for gid, _ in snuba_groups]).values_list("id",
                                                                     flat=True)

                group_to_score = dict(snuba_groups)
                for group_id in filtered_group_ids:
                    if group_id in result_group_ids:
                        # because we're doing multiple Snuba queries, which
                        # happen outside of a transaction, there is a small possibility
                        # of groups moving around in the sort scoring underneath us,
                        # so we at least want to protect against duplicates
                        continue

                    group_score = group_to_score[group_id]
                    result_group_ids.add(group_id)
                    result_groups.append((group_id, group_score))

            # break the query loop for one of three reasons:
            # * we started with Postgres candidates and so only do one Snuba query max
            # * the paginator is returning enough results to satisfy the query (>= the limit)
            # * there are no more groups in Snuba to post-filter
            # TODO do we actually have to rebuild this SequencePaginator every time
            # or can we just make it after we've broken out of the loop?
            paginator_results = SequencePaginator(
                [(score, id) for (id, score) in result_groups],
                reverse=True,
                **paginator_options).get_result(limit, cursor, known_hits=hits)

            if group_ids or len(
                    paginator_results.results) >= limit or not more_results:
                break

        # HACK: We're using the SequencePaginator to mask the complexities of going
        # back and forth between two databases. This causes a problem with pagination
        # because we're 'lying' to the SequencePaginator (it thinks it has the entire
        # result set in memory when it does not). For this reason we need to make some
        # best guesses as to whether the `prev` and `next` cursors have more results.

        if len(paginator_results.results) == limit and more_results:
            # Because we are going back and forth between DBs there is a small
            # chance that we will hand the SequencePaginator exactly `limit`
            # items. In this case the paginator will assume there are no more
            # results, so we need to override the `next` cursor's results.
            paginator_results.next.has_results = True

        if cursor is not None and (not cursor.is_prev
                                   or len(paginator_results.results) > 0):
            # If the user passed a cursor, and it isn't already a 0 result `is_prev`
            # cursor, then it's worth allowing them to go back a page to check for
            # more results.
            paginator_results.prev.has_results = True

        metrics.timing("snuba.search.num_chunks", num_chunks)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [
            groups[k] for k in paginator_results.results if k in groups
        ]

        return paginator_results
    def serialize(
        self,
        transactions: Sequence[SnubaTransaction],
        errors: Sequence[SnubaError],
        root: Optional[SnubaTransaction],
        warning_extra: Dict[str, str],
        event_id: str,
        detailed: bool = False,
    ) -> Sequence[FullResponse]:
        """ For the full event trace, we return the results as a graph instead of a flattened list """
        parent_map = self.construct_parent_map(transactions)
        error_map = self.construct_error_map(errors)
        parent_events: Dict[str, TraceEvent] = {}
        # TODO(3.7): Dictionary ordering in py3.6 is an implementation detail, using an OrderedDict because this way
        # we try to guarantee in py3.6 that the first item is the root. We can switch back to a normal dict when we're
        # on python 3.7.
        results_map: Dict[Optional[str], List[TraceEvent]] = OrderedDict()
        to_check: Deque[SnubaTransaction] = deque()
        if root:
            parent_events[root["id"]] = TraceEvent(root, None, 0)
            results_map[None] = [parent_events[root["id"]]]
            to_check.append(root)

        with sentry_sdk.start_span(op="building.trace", description="full trace"):
            iteration = 0
            has_orphans = False
            while parent_map or to_check:
                if len(to_check) == 0:
                    has_orphans = True
                    # Grab any set of events from the parent map
                    parent_span_id, current_events = parent_map.popitem()

                    current_event, *siblings = current_events
                    # If there were any siblings put them back
                    if siblings:
                        parent_map[parent_span_id] = siblings

                    previous_event = parent_events[current_event["id"]] = TraceEvent(
                        current_event, None, 0
                    )

                    # not using a defaultdict here as a DefaultOrderedDict isn't worth the effort
                    if parent_span_id in results_map:
                        results_map[parent_span_id].append(previous_event)
                    else:
                        results_map[parent_span_id] = [previous_event]
                else:
                    current_event = to_check.popleft()
                    previous_event = parent_events[current_event["id"]]

                # This is faster than doing a call to get_events, since get_event_by_id only makes a call to snuba
                # when non transaction events are included.
                with sentry_sdk.start_span(op="nodestore", description="get_event_by_id"):
                    nodestore_event = eventstore.get_event_by_id(
                        current_event["project.id"], current_event["id"]
                    )

                previous_event.nodestore_event = nodestore_event

                spans: NodeSpans = nodestore_event.data.get("spans", [])
                # Need to include the transaction as a span as well
                spans.append({"span_id": previous_event.event["trace.span"]})

                for child in spans:
                    if child["span_id"] in error_map:
                        previous_event.errors.extend(
                            [
                                self.serialize_error(error)
                                for error in error_map.pop(child["span_id"])
                            ]
                        )
                    # We need to connect back to an existing orphan trace
                    if has_orphans and child["span_id"] in results_map:
                        orphan_subtraces = results_map.pop(child["span_id"])
                        for orphan_subtrace in orphan_subtraces:
                            orphan_subtrace.parent_event_id = previous_event.event["id"]
                        previous_event.children.extend(orphan_subtraces)
                    if child["span_id"] not in parent_map:
                        continue
                    # Avoid potential span loops by popping, so we don't traverse the same nodes twice
                    child_events = parent_map.pop(child["span_id"])

                    for child_event in child_events:
                        parent_events[child_event["id"]] = TraceEvent(
                            child_event,
                            current_event["id"],
                            previous_event.generation + 1
                            if previous_event.generation is not None
                            else None,
                        )
                        # Add this event to its parent's children
                        previous_event.children.append(parent_events[child_event["id"]])

                        to_check.append(child_event)
                # Limit iterations just to be safe
                iteration += 1
                if iteration > MAX_TRACE_SIZE:
                    sentry_sdk.set_tag("discover.trace-view.warning", "surpassed-trace-limit")
                    logger.warning(
                        "discover.trace-view.surpassed-trace-limit",
                        extra=warning_extra,
                    )
                    break

        root_traces: List[TraceEvent] = []
        orphans: List[TraceEvent] = []
        for index, result in enumerate(results_map.values()):
            for subtrace in result:
                self.update_children(subtrace)
            if index > 0 or root is None:
                orphans.extend(result)
            elif root:
                root_traces = result
        # We sort orphans and roots separately because we always want the root(s) as the first element(s)
        root_traces.sort(key=child_sort_key)
        orphans.sort(key=child_sort_key)
        return [trace.full_dict(detailed) for trace in root_traces] + [
            orphan.full_dict(detailed) for orphan in orphans
        ]
예제 #9
0
    def get(self, request, organization):
        try:
            # events-meta is still used by events v1 which doesn't require global views
            params = self.get_snuba_params(request,
                                           organization,
                                           check_global_views=False)
        except NoProjects:
            return Response([])

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="find_lookup_keys") as span:
            possible_keys = ["transaction"]
            lookup_keys = {
                key: request.query_params.get(key)
                for key in possible_keys
            }

            if not any(lookup_keys.values()):
                return Response(
                    {
                        "detail":
                        f"Must provide one of {possible_keys} in order to find related events"
                    },
                    status=400,
                )

        with self.handle_query_errors():
            with sentry_sdk.start_span(op="discover.endpoint",
                                       description="filter_creation"):
                projects = self.get_projects(request, organization)
                query_kwargs = build_query_params_from_request(
                    request, organization, projects, params.get("environment"))
                query_kwargs["limit"] = 5
                try:
                    # Need to escape quotes in case some "joker" has a transaction with quotes
                    transaction_name = UNESCAPED_QUOTE_RE.sub(
                        '\\"', lookup_keys["transaction"])
                    parsed_terms = parse_search_query(
                        f'transaction:"{transaction_name}"')
                except ParseError:
                    return Response({"detail": "Invalid transaction search"},
                                    status=400)

                if query_kwargs.get("search_filters"):
                    query_kwargs["search_filters"].extend(parsed_terms)
                else:
                    query_kwargs["search_filters"] = parsed_terms

            with sentry_sdk.start_span(op="discover.endpoint",
                                       description="issue_search"):
                results = search.query(**query_kwargs)

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="serialize_results") as span:
            results = list(results)
            span.set_data("result_length", len(results))
            context = serialize(
                results,
                request.user,
                GroupSerializer(environment_func=self._get_environment_func(
                    request, organization.id)),
            )

        return Response(context)
    def serialize(self,
                  parent_map,
                  error_map,
                  root,
                  warning_extra,
                  params,
                  snuba_event=None,
                  event_id=None):
        """ For the full event trace, we return the results as a graph instead of a flattened list """
        parent_events = {}
        result = parent_events[root["id"]] = self.serialize_event(
            root, None, 0)

        with sentry_sdk.start_span(op="building.trace",
                                   description="full trace"):
            to_check = deque([root])
            iteration = 0
            while to_check:
                current_event = to_check.popleft()

                # This is faster than doing a call to get_events, since get_event_by_id only makes a call to snuba
                # when non transaction events are included.
                with sentry_sdk.start_span(op="nodestore",
                                           description="get_event_by_id"):
                    event = eventstore.get_event_by_id(
                        current_event["project.id"], current_event["id"])

                previous_event = parent_events[current_event["id"]]
                previous_event.update({
                    event_key: event.data.get(event_key)
                    for event_key in NODESTORE_KEYS
                })

                spans = event.data.get("spans", [])
                # Need to include the transaction as a span as well
                spans.append({"span_id": previous_event["span_id"]})

                for child in spans:
                    if child["span_id"] in error_map:
                        previous_event["errors"].extend(
                            error_map.pop(child["span_id"]))
                    if child["span_id"] not in parent_map:
                        continue
                    # Avoid potential span loops by popping, so we don't traverse the same nodes twice
                    child_events = parent_map.pop(child["span_id"])

                    for child_event in child_events:
                        parent_events[
                            child_event["id"]] = self.serialize_event(
                                child_event, current_event["id"],
                                previous_event["generation"] + 1)
                        # Add this event to its parent's children
                        previous_event["children"].append(
                            parent_events[child_event["id"]])

                        to_check.append(child_event)
                # Limit iterations just to be safe
                iteration += 1
                if iteration > MAX_TRACE_SIZE:
                    logger.warning(
                        "discover.trace-view.surpassed-trace-limit",
                        extra=warning_extra,
                    )
                    break

        return result
    def serialize(
        self,
        transactions: Sequence[SnubaTransaction],
        errors: Sequence[SnubaError],
        root: Optional[SnubaTransaction],
        warning_extra: Dict[str, str],
        event_id: str,
        detailed: bool = False,
    ) -> Sequence[LightResponse]:
        """ Because the light endpoint could potentially have gaps between root and event we return a flattened list """
        snuba_event, nodestore_event = self.get_current_transaction(transactions, errors, event_id)
        parent_map = self.construct_parent_map(transactions)
        error_map = self.construct_error_map(errors)
        trace_results: List[TraceEvent] = []
        current_generation: Optional[int] = None
        root_id: Optional[str] = None

        with sentry_sdk.start_span(op="building.trace", description="light trace"):
            # We might not be necessarily connected to the root if we're on an orphan event
            if root is not None and root["id"] != snuba_event["id"]:
                # Get the root event and see if the current event's span is in the root event
                root_event = eventstore.get_event_by_id(root["project.id"], root["id"])
                root_spans: NodeSpans = root_event.data.get("spans", [])
                root_span = find_event(
                    root_spans,
                    lambda item: item is not None
                    and item["span_id"] == snuba_event["trace.parent_span"],
                )

                # We only know to add the root if its the direct parent
                if root_span is not None:
                    # For the light response, the parent will be unknown unless it is a direct descendent of the root
                    root_id = root["id"]
                    trace_results.append(
                        TraceEvent(
                            root,
                            None,
                            0,
                        )
                    )
                    current_generation = 1
            elif root is not None and root["id"] == snuba_event["id"]:
                current_generation = 0

            current_event = TraceEvent(snuba_event, root_id, current_generation)
            trace_results.append(current_event)

            spans: NodeSpans = nodestore_event.data.get("spans", [])
            # Need to include the transaction as a span as well
            spans.append({"span_id": snuba_event["trace.span"]})

            for span in spans:
                if span["span_id"] in error_map:
                    current_event.errors.extend(
                        [self.serialize_error(error) for error in error_map.pop(span["span_id"])]
                    )
                if span["span_id"] in parent_map:
                    child_events = parent_map.pop(span["span_id"])
                    trace_results.extend(
                        [
                            TraceEvent(
                                child_event,
                                snuba_event["id"],
                                (
                                    current_event.generation + 1
                                    if current_event.generation is not None
                                    else None
                                ),
                            )
                            for child_event in child_events
                        ]
                    )

        return [result.to_dict() for result in trace_results]
예제 #12
0
파일: util.py 프로젝트: fpacifici/snuba
 def wrapper(*args, **kwargs) -> Any:
     with sentry_sdk.start_span(description=func.__name__,
                                op=op) as span:
         span.set_data("filename", filename)
         return func(*args, **kwargs)
예제 #13
0
    def get(self, request, organization):
        if not self.has_feature(organization, request):
            return Response(status=404)

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="filter_params") as span:
            span.set_tag("organization", organization)
            try:
                params = self.get_filter_params(request, organization)
            except NoProjects:
                return Response([])
            params = self.quantize_date_params(request, params)

            has_global_views = features.has("organizations:global-views",
                                            organization,
                                            actor=request.user)
            if not has_global_views and len(params.get("project_id", [])) > 1:
                raise ParseError(
                    detail="You cannot view events from multiple projects.")

            middle = params["start"] + timedelta(
                seconds=(params["end"] - params["start"]).total_seconds() *
                0.5)
            start, middle, end = (
                datetime.strftime(params["start"], DateArg.date_format),
                datetime.strftime(middle, DateArg.date_format),
                datetime.strftime(params["end"], DateArg.date_format),
            )

        trend_function = request.GET.get("trendFunction", "p50()")
        function, columns = parse_function(trend_function)
        trend_column = self.trend_columns.get(function)
        if trend_column is None:
            raise ParseError(detail=u"{} is not a supported trend function".
                             format(trend_function))

        count_column = self.trend_columns.get("count_range")
        percentage_column = self.trend_columns["percentage"]
        selected_columns = request.GET.getlist("field")[:]
        query = request.GET.get("query")
        orderby = self.get_orderby(request)

        def data_fn(offset, limit):
            return discover.query(
                selected_columns=selected_columns + [
                    trend_column["format"].format(
                        *columns, start=start, end=middle, index="1"),
                    trend_column["format"].format(
                        *columns, start=middle, end=end, index="2"),
                    percentage_column["format"].format(
                        alias=trend_column["alias"]),
                    "minus({alias}2,{alias}1)".format(
                        alias=trend_column["alias"]),
                    count_column["format"].format(
                        start=start, end=middle, index="1"),
                    count_column["format"].format(
                        start=middle, end=end, index="2"),
                    percentage_column["format"].format(
                        alias=count_column["alias"]),
                ],
                query=query,
                params=params,
                orderby=orderby,
                offset=offset,
                limit=limit,
                referrer="api.trends.get-percentage-change",
                auto_fields=True,
                use_aggregate_conditions=True,
            )

        def on_results(events_results):
            def get_event_stats(query_columns, query, params, rollup,
                                reference_event):
                return discover.top_events_timeseries(
                    query_columns,
                    selected_columns,
                    query,
                    params,
                    orderby,
                    rollup,
                    min(5, len(events_results["data"])),
                    organization,
                    top_events=events_results,
                    referrer="api.trends.get-event-stats",
                )

            stats_results = (self.get_event_stats_data(
                request,
                organization,
                get_event_stats,
                top_events=True,
                query_column=trend_function,
            ) if len(events_results["data"]) > 0 else {})

            return {
                "events":
                self.handle_results_with_meta(request, organization,
                                              params["project_id"],
                                              events_results),
                "stats":
                stats_results,
            }

        with self.handle_query_errors():
            return self.paginate(
                request=request,
                paginator=GenericOffsetPaginator(data_fn=data_fn),
                on_results=on_results,
                default_per_page=5,
                max_per_page=5,
            )
예제 #14
0
파일: snuba.py 프로젝트: yzdann/sentry
def bulk_raw_query(snuba_param_list, referrer=None):
    headers = {}
    if referrer:
        headers["referer"] = referrer

    query_param_list = map(_prepare_query_params, snuba_param_list)

    def snuba_query(params):
        query_params, forward, reverse, thread_hub = params
        try:
            with timer("snuba_query"):
                referrer = headers.get("referer", "<unknown>")
                if SNUBA_INFO:
                    logger.info("{}.body: {}".format(referrer,
                                                     json.dumps(query_params)))
                    query_params["debug"] = True
                body = json.dumps(query_params)
                with thread_hub.start_span(
                        op="snuba",
                        description=u"query {}".format(referrer)) as span:
                    span.set_tag("referrer", referrer)
                    for param_key, param_data in six.iteritems(query_params):
                        span.set_data(param_key, param_data)
                    return (
                        _snuba_pool.urlopen("POST",
                                            "/query",
                                            body=body,
                                            headers=headers),
                        forward,
                        reverse,
                    )
        except urllib3.exceptions.HTTPError as err:
            raise SnubaError(err)

    with sentry_sdk.start_span(
            op="start_snuba_query",
            description=u"running {} snuba queries".format(
                len(snuba_param_list)),
    ) as span:
        span.set_tag("referrer", headers.get("referer", "<unknown>"))
        if len(snuba_param_list) > 1:
            query_results = list(
                _query_thread_pool.map(snuba_query, [
                    params + (Hub(Hub.current), )
                    for params in query_param_list
                ]))
        else:
            # No need to submit to the thread pool if we're just performing a
            # single query
            query_results = [
                snuba_query(query_param_list[0] + (Hub(Hub.current), ))
            ]

    results = []
    for response, _, reverse in query_results:
        try:
            body = json.loads(response.data)
            if SNUBA_INFO:
                if "sql" in body:
                    logger.info("{}.sql: {}".format(
                        headers.get("referer", "<unknown>"), body["sql"]))
                if "error" in body:
                    logger.info("{}.err: {}".format(
                        headers.get("referer", "<unknown>"), body["error"]))
        except ValueError:
            if response.status != 200:
                logger.error("snuba.query.invalid-json")
                raise SnubaError("Failed to parse snuba error response")
            raise UnexpectedResponseError(
                u"Could not decode JSON response: {}".format(response.data))

        if response.status != 200:
            if body.get("error"):
                error = body["error"]
                if response.status == 429:
                    raise RateLimitExceeded(error["message"])
                elif error["type"] == "schema":
                    raise SchemaValidationError(error["message"])
                elif error["type"] == "clickhouse":
                    raise clickhouse_error_codes_map.get(
                        error["code"], QueryExecutionError)(error["message"])
                else:
                    raise SnubaError(error["message"])
            else:
                raise SnubaError(u"HTTP {}".format(response.status))

        # Forward and reverse translation maps from model ids to snuba keys, per column
        body["data"] = [reverse(d) for d in body["data"]]
        results.append(body)

    return results
예제 #15
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False
        )
        error_logger.error("process.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    with sentry_sdk.start_span(op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer("tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id
        )

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer(
            "tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate}
        ):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if has_changed and options.get("processing.can-use-scrubbers"):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer(
                "tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate}
            ):
                new_data = safe_execute(scrub_data, project=project, event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None and features.has(
                    "organizations:datascrubbers-v2", project.organization, actor=None
                ):
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                "tasks.store.process_event.preprocessors",
                tags={"plugin": plugin.slug, "from_symbolicate": from_symbolicate},
            ):
                processors = safe_execute(
                    plugin.get_event_preprocessors, data=data, _with_transaction=False
                )
                for processor in processors or ():
                    try:
                        result = processor(data)
                    except Exception:
                        error_logger.exception("tasks.store.preprocessors.error")
                        data.setdefault("_metrics", {})["flag.processing.error"] = True
                        has_changed = True
                    else:
                        if result:
                            data = result
                            has_changed = True

    assert data["project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(
            remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS
        )
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                cache_key,
                data,
                project_id,
                list(issues.values()),
                event_id=event_id,
                start_time=start_time,
                reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project)
            return

        cache_key = event_processing_store.store(data)

    submit_save_event(project, cache_key, event_id, start_time, data)
예제 #16
0
파일: discover.py 프로젝트: mburgs/sentry
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    auto_aggregations (bool) Whether aggregates should be added automatically if they're used
                    in conditions, and there's at least one aggregate already.
    use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    # We clobber this value throughout this code, so copy the value
    selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    function_translations = {}

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, resolved_fields["functions"],
                                 translated_columns, snuba_filter,
                                 selected_columns)
예제 #17
0
 def get_environments(self, request, organization):
     with sentry_sdk.start_span(op="PERF: Org.get_environments"):
         return get_environments(request, organization)
예제 #18
0
파일: base.py 프로젝트: snilwx/sentry
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        with sentry_sdk.start_span(op="base.dispatch.setup",
                                   description=type(self).__name__):
            self.args = args
            self.kwargs = kwargs
            request = self.initialize_request(request, *args, **kwargs)
            self.load_json_body(request)
            self.request = request
            self.headers = self.default_response_headers  # deprecate?

        # Tags that will ultimately flow into the metrics backend at the end of
        # the request (happens via middleware/stats.py).
        request._metric_tags = {}

        if settings.SENTRY_API_RESPONSE_DELAY:
            start_time = time.time()

        origin = request.META.get("HTTP_ORIGIN", "null")
        # A "null" value should be treated as no Origin for us.
        # See RFC6454 for more information on this behavior.
        if origin == "null":
            origin = None

        try:
            with sentry_sdk.start_span(op="base.dispatch.request",
                                       description=type(self).__name__):
                if origin and request.auth:
                    allowed_origins = request.auth.get_allowed_origins()
                    if not is_valid_origin(origin, allowed=allowed_origins):
                        response = Response("Invalid origin: %s" % (origin, ),
                                            status=400)
                        self.response = self.finalize_response(
                            request, response, *args, **kwargs)
                        return self.response

                self.initial(request, *args, **kwargs)

                # Get the appropriate handler method
                if request.method.lower() in self.http_method_names:
                    handler = getattr(self, request.method.lower(),
                                      self.http_method_not_allowed)

                    (args, kwargs) = self.convert_args(request, *args,
                                                       **kwargs)
                    self.args = args
                    self.kwargs = kwargs
                else:
                    handler = self.http_method_not_allowed

                if getattr(request, "access", None) is None:
                    # setup default access
                    request.access = access.from_request(request)

            with sentry_sdk.start_span(
                    op="base.dispatch.execute",
                    description="{}.{}".format(
                        type(self).__name__, handler.__name__),
            ):
                response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(request, response, *args,
                                               **kwargs)

        if settings.SENTRY_API_RESPONSE_DELAY:
            duration = time.time() - start_time

            if duration < (settings.SENTRY_API_RESPONSE_DELAY / 1000.0):
                with sentry_sdk.start_span(
                        op="base.dispatch.sleep",
                        description=type(self).__name__,
                ) as span:
                    span.set_data("SENTRY_API_RESPONSE_DELAY",
                                  settings.SENTRY_API_RESPONSE_DELAY)
                    time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0 -
                               duration)

        return self.response
예제 #19
0
def loads(value: str, **kwargs) -> JSONData:
    with sentry_sdk.start_span(op="sentry.utils.json.loads"):
        return _default_decoder.decode(value)
예제 #20
0
 def prefetch_basic(self):
     with sentry_sdk.start_span(op="stats",
                                description=f"PREFETCH {self.cache_key}"):
         self._prefetch_basic()
예제 #21
0
def transform_results(result,
                      translated_columns,
                      snuba_filter,
                      selected_columns=None):
    """
    Transform internal names back to the public schema ones.

    When getting timeseries results via rollup, this function will
    zerofill the output results.
    """
    if selected_columns is None:
        selected_columns = []

    meta = []
    for col in result["meta"]:
        # Translate back column names that were converted to snuba format
        col["name"] = translated_columns.get(col["name"], col["name"])
        # Remove user fields as they will be replaced by the alias.
        meta.append(col)

    def get_row(row):
        transformed = {}
        for key, value in row.items():
            if isinstance(value, float) and math.isnan(value):
                value = 0
            transformed[translated_columns.get(key, key)] = value

        return transformed

    if len(translated_columns):
        result["data"] = [get_row(row) for row in result["data"]]

    rollup = snuba_filter.rollup
    if rollup and rollup > 0:
        with sentry_sdk.start_span(
                op="discover.discover",
                description="transform_results.zerofill") as span:
            span.set_data("result_count", len(result.get("data", [])))
            result["data"] = zerofill(result["data"], snuba_filter.start,
                                      snuba_filter.end, rollup,
                                      snuba_filter.orderby)

    for col in result["meta"]:
        if col["name"].startswith("histogram"):
            # The column name here has been translated, we need the original name
            for snuba_name, sentry_name in six.iteritems(translated_columns):
                if sentry_name == col["name"]:
                    with sentry_sdk.start_span(
                            op="discover.discover",
                            description="transform_results.histogram_zerofill"
                    ) as span:
                        span.set_data("histogram_function", snuba_name)
                        result["data"] = zerofill_histogram(
                            result["data"],
                            result["meta"],
                            snuba_filter.orderby,
                            sentry_name,
                            snuba_name,
                        )
            break

    return result
예제 #22
0
파일: client.py 프로젝트: jiebaby/sentry
    def _request(
        self,
        method,
        path,
        headers=None,
        data=None,
        params=None,
        auth=None,
        json=True,
        allow_text=None,
        allow_redirects=None,
        timeout=None,
    ):

        if allow_text is None:
            allow_text = self.allow_text

        if allow_redirects is None:
            allow_redirects = self.allow_redirects

        if allow_redirects is None:  # is still None
            allow_redirects = method.upper() == "GET"

        if timeout is None:
            timeout = 30

        full_url = self.build_url(path)
        session = build_session()

        metrics.incr(
            u"%s.http_request" % self.datadog_prefix,
            sample_rate=1.0,
            tags={self.integration_type: self.name},
        )

        with sentry_sdk.start_span(
            op=u"{}.http".format(self.integration_type),
            transaction=u"{}.http_response.{}".format(self.integration_type, self.name),
        ) as span:
            try:
                resp = getattr(session, method.lower())(
                    url=full_url,
                    headers=headers,
                    json=data if json else None,
                    data=data if not json else None,
                    params=params,
                    auth=auth,
                    verify=self.verify_ssl,
                    allow_redirects=allow_redirects,
                    timeout=timeout,
                )
                resp.raise_for_status()
            except ConnectionError as e:
                self.track_response_data("connection_error", span, e)
                raise ApiHostError.from_exception(e)
            except Timeout as e:
                self.track_response_data("timeout", span, e)
                raise ApiTimeoutError.from_exception(e)
            except HTTPError as e:
                resp = e.response
                if resp is None:
                    self.track_response_data("unknown", span, e)
                    self.logger.exception(
                        "request.error", extra={self.integration_type: self.name, "url": full_url}
                    )
                    raise ApiError("Internal Error")
                self.track_response_data(resp.status_code, span, e)
                raise ApiError.from_response(resp)

            self.track_response_data(resp.status_code, span, None, resp)

            if resp.status_code == 204:
                return {}

            return BaseApiResponse.from_response(resp, allow_text=allow_text)
예제 #23
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
    top_events=None,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                        usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                        this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                        to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    if top_events is None:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="top_events.fetch_events"):
            top_events = query(
                selected_columns,
                query=user_query,
                params=params,
                orderby=orderby,
                limit=limit,
                referrer=referrer,
                use_aggregate_conditions=True,
            )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.filter_transform") as span:
        span.set_data("query", user_query)
        snuba_filter, translated_columns = get_timeseries_snuba_filter(
            list(set(timeseries_columns + selected_columns)),
            user_query,
            params,
            rollup,
            default_count=False,
        )

        for field in selected_columns:
            # project is handled by filter_keys already
            if field in ["project", "project.id"]:
                continue
            if field == "issue":
                field = FIELD_ALIASES["issue"]["column_alias"]
            # Note that because orderby shouldn't be an array field its not included in the values
            values = list({
                event.get(field)
                for event in top_events["data"]
                if field in event and not isinstance(event.get(field), list)
            })
            if values:
                # timestamp needs special handling, creating a big OR instead
                if field == "timestamp":
                    snuba_filter.conditions.append([["timestamp", "=", value]
                                                    for value in values])
                elif None in values:
                    non_none_values = [
                        value for value in values if value is not None
                    ]
                    condition = [[["isNull", [resolve_discover_column(field)]],
                                  "=", 1]]
                    if non_none_values:
                        condition.append([
                            resolve_discover_column(field), "IN",
                            non_none_values
                        ])
                    snuba_filter.conditions.append(condition)
                else:
                    snuba_filter.conditions.append(
                        [resolve_discover_column(field), "IN", values])

    with sentry_sdk.start_span(op="discover.discover",
                               description="top_events.snuba_query"):
        result = raw_query(
            aggregations=snuba_filter.aggregations,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            selected_columns=snuba_filter.selected_columns,
            start=snuba_filter.start,
            end=snuba_filter.end,
            rollup=rollup,
            orderby="time",
            groupby=["time"] + snuba_filter.groupby,
            dataset=Dataset.Discover,
            limit=10000,
            referrer=referrer,
        )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        result = transform_results(result, translated_columns, snuba_filter,
                                   selected_columns)

        translated_columns["project_id"] = "project"
        translated_groupby = [
            translated_columns.get(groupby, groupby)
            for groupby in snuba_filter.groupby
        ]

        issues = {}
        if "issue" in selected_columns:
            issues = Group.issues_mapping(
                set([event["issue.id"] for event in top_events["data"]]),
                params["project_id"],
                organization,
            )
        # so the result key is consistent
        translated_groupby.sort()

        results = {}
        # Using the top events add the order to the results
        for index, item in enumerate(top_events["data"]):
            result_key = create_result_key(item, translated_groupby, issues)
            results[result_key] = {"order": index, "data": []}
        for row in result["data"]:
            result_key = create_result_key(row, translated_groupby, issues)
            if result_key in results:
                results[result_key]["data"].append(row)
            else:
                logger.warning(
                    "discover.top-events.timeseries.key-mismatch",
                    extra={
                        "result_key": result_key,
                        "top_event_keys": list(results.keys())
                    },
                )
        for key, item in six.iteritems(results):
            results[key] = SnubaTSResult(
                {
                    "data":
                    zerofill(item["data"], snuba_filter.start,
                             snuba_filter.end, rollup, "time"),
                    "order":
                    item["order"],
                },
                snuba_filter.start,
                snuba_filter.end,
                rollup,
            )

    return results
    def get(self, request: Request, organization: Organization) -> Response:
        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="filter_params") as span:
            span.set_data("organization", organization)
            if not self.has_feature(organization, request):
                # We used to return a "v1" result here, keeping tags to keep an eye on its use
                span.set_data("using_v1_results", True)
                sentry_sdk.set_tag("stats.using_v1", organization.slug)
                return Response(status=404)

            top_events = 0

            if "topEvents" in request.GET:
                try:
                    top_events = int(request.GET.get("topEvents", 0))
                except ValueError:
                    return Response({"detail": "topEvents must be an integer"},
                                    status=400)
                if top_events > MAX_TOP_EVENTS:
                    return Response(
                        {
                            "detail":
                            f"Can only get up to {MAX_TOP_EVENTS} top events"
                        },
                        status=400,
                    )
                elif top_events <= 0:
                    return Response(
                        {"detail": "If topEvents needs to be at least 1"},
                        status=400)

            comparison_delta = None
            if "comparisonDelta" in request.GET:
                try:
                    comparison_delta = timedelta(
                        seconds=int(request.GET["comparisonDelta"]))
                except ValueError:
                    return Response(
                        {"detail": "comparisonDelta must be an integer"},
                        status=400)

            # The partial parameter determines whether or not partial buckets are allowed.
            # The last bucket of the time series can potentially be a partial bucket when
            # the start of the bucket does not align with the rollup.
            allow_partial_buckets = request.GET.get("partial") == "1"

            referrer = request.GET.get("referrer")
            referrer = (referrer if referrer in ALLOWED_EVENTS_STATS_REFERRERS
                        else "api.organization-event-stats")

        def get_event_stats(
            query_columns: Sequence[str],
            query: str,
            params: Dict[str, str],
            rollup: int,
            zerofill_results: bool,
            comparison_delta: Optional[datetime],
        ) -> SnubaTSResult:
            if top_events > 0:
                return discover.top_events_timeseries(
                    timeseries_columns=query_columns,
                    selected_columns=self.get_field_list(
                        organization, request),
                    equations=self.get_equation_list(organization, request),
                    user_query=query,
                    params=params,
                    orderby=self.get_orderby(request),
                    rollup=rollup,
                    limit=top_events,
                    organization=organization,
                    referrer=referrer + ".find-topn",
                    allow_empty=False,
                    zerofill_results=zerofill_results,
                    include_other=True,
                    use_snql=self.has_discover_snql(organization, request),
                )
            return discover.timeseries_query(
                selected_columns=query_columns,
                query=query,
                params=params,
                rollup=rollup,
                referrer=referrer,
                zerofill_results=zerofill_results,
                comparison_delta=comparison_delta,
                use_snql=self.has_discover_snql(organization, request),
            )

        try:
            return Response(
                self.get_event_stats_data(
                    request,
                    organization,
                    get_event_stats,
                    top_events,
                    allow_partial_buckets=allow_partial_buckets,
                    zerofill_results=not (
                        request.GET.get("withoutZerofill") == "1" and
                        self.has_chart_interpolation(organization, request)),
                    comparison_delta=comparison_delta,
                ),
                status=200,
            )
        except ValidationError:
            return Response(
                {"detail": "Comparison period is outside retention window"},
                status=400)
    def handle_message(self, message):
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                subscription = QuerySubscription.objects.get_from_cache(
                    subscription_id=contents["subscription_id"])
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            logger.info(
                "query-subscription-consumer.handle_message",
                extra={
                    "timestamp": contents["timestamp"],
                    "query_subscription_id": contents["subscription_id"],
                    "contents": contents,
                    "offset": message.offset(),
                    "partition": message.partition(),
                    "value": message.value(),
                },
            )

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message",
                    transaction="query_subscription_consumer_process_message"
            ) as span, metrics.timer(
                    "snuba_query_subscriber.callback.duration",
                    instance=subscription.type):
                span.set_data("payload", contents)
                callback(contents, subscription)
예제 #26
0
def query_facet_performance(
    params: Mapping[str, str],
    tag_data: Mapping[str, Any],
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
    orderby: Optional[str] = None,
    referrer: Optional[str] = None,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
) -> Dict:
    with sentry_sdk.start_span(
        op="discover.discover", description="facets.filter_transform"
    ) as span:
        span.set_data("query", filter_query)
        snuba_filter = discover.get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter)
    translated_aggregate_column = discover.resolve_discover_column(aggregate_column)

    # Aggregate (avg) and count of all transactions for this query
    transaction_aggregate = tag_data["aggregate"]

    # Dynamically sample so at least 50000 transactions are selected
    sample_start_count = 50000
    transaction_count = tag_data["count"]
    sampling_enabled = transaction_count > sample_start_count

    # log-e growth starting at 50,000
    target_sample = max(
        sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)),
        transaction_count,
    )

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    # Exclude tags that have high cardinality are are generally unrelated to performance
    excluded_tags = [
        "tags_key",
        "NOT IN",
        ["trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url"],
    ]

    with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"):
        span.set_data("sample_rate", sample_rate)
        span.set_data("target_sample", target_sample)
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0
        having = [excluded_tags]
        having.append(["aggregate", ">", aggregate_comparison])

        resolved_orderby = [] if orderby is None else orderby

        snuba_filter.conditions.append([translated_aggregate_column, "IS NOT NULL", None])

        tag_selected_columns = [
            [
                "sum",
                [
                    "minus",
                    [
                        translated_aggregate_column,
                        str(transaction_aggregate),
                    ],
                ],
                "sumdelta",
            ],
            ["count", [], "count"],
            [
                "divide",
                [
                    [
                        "divide",
                        [["count", []], frequency_sample_rate],
                    ],
                    transaction_count,
                ],
                "frequency",
            ],
            ["divide", ["aggregate", transaction_aggregate], "comparison"],
            ["avg", [translated_aggregate_column], "aggregate"],
        ]

        results = discover.raw_query(
            selected_columns=tag_selected_columns,
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=resolved_orderby + ["tags_key"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer=f"{referrer}.tag_values".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=[1, "tags_key"],
            limit=limit,
            offset=offset,
        )

        results["meta"] = discover.transform_meta(results, {})

        return results
예제 #27
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.tasks.store import preprocess_event_from_reprocessing
    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT

    # Take unprocessed data from old event and save it as unprocessed data
    # under a new event ID. The second step happens in pre-process. We could
    # save the "original event ID" instead and get away with writing less to
    # nodestore, but doing it this way makes the logic slightly simpler.
    node_id = _generate_unprocessed_event_node_id(project_id=project_id,
                                                  event_id=event_id)

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        data = nodestore.get(node_id)

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        logger.error("reprocessing2.event.not_found",
                     extra={
                         "project_id": project_id,
                         "event_id": event_id
                     })
        return

    if data is None:
        logger.error(
            "reprocessing2.reprocessing_nodestore.not_found",
            extra={
                "project_id": project_id,
                "event_id": event_id
            },
        )
        # We have no real data for reprocessing. We assume this event goes
        # straight to save_event, and hope that the event data can be
        # reingested like that. It's better than data loss.
        #
        # XXX: Ideally we would run a "save-lite" for this that only updates
        # the group ID in-place. Like a snuba merge message.
        data = dict(event.data)

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_path(data,
             "contexts",
             "reprocessing",
             "original_issue_id",
             value=event.group_id)
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache
    queryset = models.EventAttachment.objects.filter(
        project_id=project_id, event_id=event_id).select_related("file")

    attachment_objects = []

    for attachment_id, attachment in enumerate(queryset):
        with sentry_sdk.start_span(
                op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                ))

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key,
                                 attachments=attachment_objects,
                                 timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(cache_key=cache_key,
                                       start_time=start_time,
                                       event_id=event_id)
예제 #28
0
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False
        )
        error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_project(project_id)

    event_id = data["event_id"]

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    try:
        with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span:
            span.set_data("symbolicaton_function", symbolication_function.__name__)

            with metrics.timer("tasks.store.symbolicate_event.symbolication"):
                symbolicated_data = symbolication_function(data)

            span.set_data("symbolicated_data", bool(symbolicated_data))
            if symbolicated_data:
                data = symbolicated_data
                has_changed = True

    except RetrySymbolication as e:
        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
            error_logger.warning(
                "symbolicate.slow", extra={"project_id": project_id, "event_id": event_id}
            )

        if start_time and (time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
            # Do not drop event but actually continue with rest of pipeline
            # (persisting unsymbolicated event)
            error_logger.exception(
                "symbolicate.failed.infinite_retry",
                extra={"project_id": project_id, "event_id": event_id},
            )
            data.setdefault("_metrics", {})["flag.processing.error"] = True
            data.setdefault("_metrics", {})["flag.processing.fatal"] = True
            has_changed = True
        else:
            # Requeue the task in the "sleep" queue
            retry_symbolicate_event.apply_async(
                args=(),
                kwargs={
                    "symbolicate_task_name": symbolicate_task.__name__,
                    "task_kwargs": {
                        "cache_key": cache_key,
                        "event_id": event_id,
                        "start_time": start_time,
                    },
                },
                countdown=e.retry_after,
            )
            return
    except Exception:
        error_logger.exception("tasks.store.symbolicate_event.symbolication")
        data.setdefault("_metrics", {})["flag.processing.error"] = True
        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
        has_changed = True

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    process_task = process_event_from_reprocessing if from_reprocessing else process_event
    _do_process_event(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        process_task=process_task,
        data=data,
        data_has_changed=has_changed,
        from_symbolicate=True,
    )
예제 #29
0
    def get(self, request, organization):
        if not self.has_feature(organization, request):
            return Response(status=404)

        try:
            params = self.get_snuba_params(request, organization)
        except NoProjects:
            return Response([])

        with sentry_sdk.start_span(op="discover.endpoint",
                                   description="trend_dates"):
            middle_date = request.GET.get("middle")
            if middle_date:
                try:
                    middle = parse_datetime_string(middle_date)
                except InvalidQuery:
                    raise ParseError(
                        detail=f"{middle_date} is not a valid date format")
                if middle <= params["start"] or middle >= params["end"]:
                    raise ParseError(
                        detail=
                        "The middle date should be within the duration of the query"
                    )
            else:
                middle = params["start"] + timedelta(
                    seconds=(params["end"] - params["start"]).total_seconds() *
                    0.5)
            middle = datetime.strftime(middle, DateArg.date_format)

        trend_type = request.GET.get("trendType", REGRESSION)
        if trend_type not in TREND_TYPES:
            raise ParseError(
                detail=f"{trend_type} is not a supported trend type")

        params["aliases"] = self.get_function_aliases(trend_type)

        trend_function = request.GET.get("trendFunction", "p50()")
        function, columns, alias = parse_function(trend_function)
        if len(columns) == 0:
            # Default to duration
            column = "transaction.duration"
        else:
            column = columns[0]

        trend_columns = self.get_trend_columns(function, column, middle)

        selected_columns = self.get_field_list(organization, request)
        orderby = self.get_orderby(request)

        query = request.GET.get("query")

        def data_fn(offset, limit):
            return discover.query(
                selected_columns=selected_columns + trend_columns,
                query=query,
                params=params,
                orderby=orderby,
                offset=offset,
                limit=limit,
                referrer="api.trends.get-percentage-change",
                auto_fields=True,
                auto_aggregations=True,
                use_aggregate_conditions=True,
            )

        with self.handle_query_errors():
            return self.paginate(
                request=request,
                paginator=GenericOffsetPaginator(data_fn=data_fn),
                on_results=self.build_result_handler(request, organization,
                                                     params, trend_function,
                                                     selected_columns, orderby,
                                                     query),
                default_per_page=5,
                max_per_page=5,
            )
예제 #30
0
    def _post_by_project(self, request, full_config_requested):
        project_ids = set(request.relay_request_data.get("projects") or ())

        with start_span(op="relay_fetch_projects"):
            if project_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_projects.duration"):
                    projects = {
                        p.id: p
                        for p in Project.objects.get_many_from_cache(
                            project_ids)
                    }
            else:
                projects = {}

        with start_span(op="relay_fetch_orgs"):
            # Preload all organizations and their options to prevent repeated
            # database access when computing the project configuration.
            org_ids = {
                project.organization_id
                for project in projects.values()
            }
            if org_ids:
                with metrics.timer(
                        "relay_project_configs.fetching_orgs.duration"):
                    orgs = Organization.objects.get_many_from_cache(org_ids)
                    orgs = {
                        o.id: o
                        for o in orgs if request.relay.has_org_access(o)
                    }
            else:
                orgs = {}

            with metrics.timer(
                    "relay_project_configs.fetching_org_options.duration"):
                for org_id in orgs.keys():
                    OrganizationOption.objects.get_all_values(org_id)

        with start_span(op="relay_fetch_keys"):
            project_keys = {}
            for key in ProjectKey.objects.filter(project_id__in=project_ids):
                project_keys.setdefault(key.project_id, []).append(key)

        metrics.timing("relay_project_configs.projects_requested",
                       len(project_ids))
        metrics.timing("relay_project_configs.projects_fetched", len(projects))
        metrics.timing("relay_project_configs.orgs_fetched", len(orgs))

        configs = {}
        for project_id in project_ids:
            configs[str(project_id)] = {"disabled": True}

            project = projects.get(int(project_id))
            if project is None:
                continue

            organization = orgs.get(project.organization_id)
            if organization is None:
                continue

            # Prevent organization from being fetched again in quotas.
            project.set_cached_field_value("organization", organization)

            with start_span(op="get_config"):
                with metrics.timer(
                        "relay_project_configs.get_config.duration"):
                    project_config = config.get_project_config(
                        project,
                        full_config=full_config_requested,
                        project_keys=project_keys.get(project.id) or [],
                    )

            configs[str(project_id)] = project_config.to_dict()

        if full_config_requested:
            projectconfig_cache.set_many(configs)

        return Response({"configs": configs}, status=200)