Exemple #1
0
def should_demote_symbolication(project_id: int) -> bool:
    """
    Determines whether a project's symbolication events should be pushed to the low priority queue.

    The decision is made based on three factors, in order:
        1. is the store.symbolicate-event-lpq-never killswitch set for the project? -> normal queue
        2. is the store.symbolicate-event-lpq-always killswitch set for the project? -> low priority queue
        3. has the project been selected for the lpq according to realtime_metrics? -> low priority queue

    Note that 3 is gated behind the config setting SENTRY_ENABLE_AUTO_LOW_PRIORITY_QUEUE.
    """
    always_lowpri = killswitch_matches_context(
        "store.symbolicate-event-lpq-always",
        {
            "project_id": project_id,
        },
    )
    never_lowpri = killswitch_matches_context(
        "store.symbolicate-event-lpq-never",
        {
            "project_id": project_id,
        },
    )

    if never_lowpri:
        return False
    elif always_lowpri:
        return True
    else:
        return settings.SENTRY_ENABLE_AUTO_LOW_PRIORITY_QUEUE and realtime_metrics.is_lpq_project(
            project_id
        )
Exemple #2
0
def should_demote_symbolication(project_id):
    """
    Determines whether a project's symbolication events should be pushed to the low priority queue.
    """
    always_lowpri = killswitch_matches_context(
        "store.symbolicate-event-lpq-always",
        {
            "project_id": project_id,
        },
    )
    never_lowpri = killswitch_matches_context(
        "store.symbolicate-event-lpq-never",
        {
            "project_id": project_id,
        },
    )
    return not never_lowpri and always_lowpri
Exemple #3
0
def _do_process_event(
    cache_key,
    start_time,
    event_id,
    process_task,
    data=None,
    data_has_changed=None,
    from_symbolicate=False,
):
    from sentry.plugins.base import plugins

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "process"
                     },
                     skip_internal=False)
        error_logger.error("process.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    if killswitch_matches_context(
            "store.load-shed-process-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
        },
    ):
        return

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_project_from_cache"):
        project = Project.objects.get_from_cache(id=project_id)

    with metrics.timer(
            "tasks.store.process_event.organization.get_from_cache"):
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

    has_changed = bool(data_has_changed)

    with sentry_sdk.start_span(
            op="tasks.store.process_event.get_reprocessing_revision"):
        # Fetch the reprocessing revision
        reprocessing_rev = reprocessing.get_reprocessing_revision(project_id)

    # Stacktrace based event processors.
    with sentry_sdk.start_span(op="task.store.process_event.stacktraces"):
        with metrics.timer("tasks.store.process_event.stacktraces",
                           tags={"from_symbolicate": from_symbolicate}):
            new_data = process_stacktraces(data)

    if new_data is not None:
        has_changed = True
        data = new_data

    # Second round of datascrubbing after stacktrace and language-specific
    # processing. First round happened as part of ingest.
    #
    # *Right now* the only sensitive data that is added in stacktrace
    # processing are usernames in filepaths, so we run directly after
    # stacktrace processors.
    #
    # We do not yet want to deal with context data produced by plugins like
    # sessionstack or fullstory (which are in `get_event_preprocessors`), as
    # this data is very unlikely to be sensitive data. This is why scrubbing
    # happens somewhere in the middle of the pipeline.
    #
    # On the other hand, Javascript event error translation is happening after
    # this block because it uses `get_event_preprocessors` instead of
    # `get_event_enhancers`.
    #
    # We are fairly confident, however, that this should run *before*
    # re-normalization as it is hard to find sensitive data in partially
    # trimmed strings.
    if has_changed and options.get("processing.can-use-scrubbers"):
        with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"):
            with metrics.timer("tasks.store.datascrubbers.scrub",
                               tags={"from_symbolicate": from_symbolicate}):
                new_data = safe_execute(scrub_data,
                                        project=project,
                                        event=data.data)

                # XXX(markus): When datascrubbing is finally "totally stable", we might want
                # to drop the event if it crashes to avoid saving PII
                if new_data is not None:
                    data.data = new_data

    # TODO(dcramer): ideally we would know if data changed by default
    # Default event processors.
    for plugin in plugins.all(version=2):
        with sentry_sdk.start_span(
                op="task.store.process_event.preprocessors") as span:
            span.set_data("plugin", plugin.slug)
            span.set_data("from_symbolicate", from_symbolicate)
            with metrics.timer(
                    "tasks.store.process_event.preprocessors",
                    tags={
                        "plugin": plugin.slug,
                        "from_symbolicate": from_symbolicate
                    },
            ):
                processors = safe_execute(plugin.get_event_preprocessors,
                                          data=data,
                                          _with_transaction=False)
                for processor in processors or ():
                    try:
                        result = processor(data)
                    except Exception:
                        error_logger.exception(
                            "tasks.store.preprocessors.error")
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        has_changed = True
                    else:
                        if result:
                            data = result
                            has_changed = True

    assert data[
        "project"] == project_id, "Project cannot be mutated by plugins"

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        # Run some of normalization again such that we don't:
        # - persist e.g. incredibly large stacktraces from minidumps
        # - store event timestamps that are older than our retention window
        #   (also happening with minidumps)
        normalizer = StoreNormalizer(remove_other=False,
                                     is_renormalize=True,
                                     **DEFAULT_STORE_NORMALIZER_ARGS)
        data = normalizer.normalize_event(dict(data))

        issues = data.get("processing_issues")

        try:
            if issues and create_failed_event(
                    cache_key,
                    data,
                    project_id,
                    list(issues.values()),
                    event_id=event_id,
                    start_time=start_time,
                    reprocessing_rev=reprocessing_rev,
            ):
                return
        except RetryProcessing:
            # If `create_failed_event` indicates that we need to retry we
            # invoke ourselves again.  This happens when the reprocessing
            # revision changed while we were processing.
            _do_preprocess_event(cache_key, data, start_time, event_id,
                                 process_task, project)
            return

        cache_key = event_processing_store.store(data)

    from_reprocessing = process_task is process_event_from_reprocessing
    submit_save_event(project, from_reprocessing, cache_key, event_id,
                      start_time, data)
Exemple #4
0
def _do_symbolicate_event(cache_key,
                          start_time,
                          event_id,
                          symbolicate_task,
                          data=None):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "symbolicate"
                     },
                     skip_internal=False)
        error_logger.error("symbolicate.failed.empty",
                           extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    if killswitch_matches_context(
            "store.load-shed-symbolicate-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
        },
    ):
        return

    symbolication_function = get_symbolication_function(data)

    has_changed = False

    from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing

    symbolication_start_time = time()

    with sentry_sdk.start_span(
            op="tasks.store.symbolicate_event.symbolication") as span:
        span.set_data("symbolicaton_function", symbolication_function.__name__)
        with metrics.timer(
                "tasks.store.symbolicate_event.symbolication",
                tags={
                    "symbolication_function": symbolication_function.__name__
                },
        ):
            while True:
                try:
                    with sentry_sdk.start_span(
                            op="tasks.store.symbolicate_event.%s" %
                            symbolication_function.__name__) as span:
                        symbolicated_data = symbolication_function(data)
                        span.set_data("symbolicated_data",
                                      bool(symbolicated_data))

                    if symbolicated_data:
                        data = symbolicated_data
                        has_changed = True

                    break
                except RetrySymbolication as e:
                    if (time() - symbolication_start_time
                        ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
                        error_logger.warning(
                            "symbolicate.slow",
                            extra={
                                "project_id": project_id,
                                "event_id": event_id
                            },
                        )
                    if (time() - symbolication_start_time
                        ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
                        # Do not drop event but actually continue with rest of pipeline
                        # (persisting unsymbolicated event)
                        metrics.incr(
                            "tasks.store.symbolicate_event.fatal",
                            tags={
                                "reason":
                                "timeout",
                                "symbolication_function":
                                symbolication_function.__name__,
                            },
                        )
                        error_logger.exception(
                            "symbolicate.failed.infinite_retry",
                            extra={
                                "project_id": project_id,
                                "event_id": event_id
                            },
                        )
                        data.setdefault("_metrics",
                                        {})["flag.processing.error"] = True
                        data.setdefault("_metrics",
                                        {})["flag.processing.fatal"] = True
                        has_changed = True
                        break
                    else:
                        # sleep for `retry_after` but max 5 seconds and try again
                        metrics.incr(
                            "tasks.store.symbolicate_event.retry",
                            tags={
                                "symbolication_function":
                                symbolication_function.__name__
                            },
                        )
                        sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER))
                        continue
                except Exception:
                    metrics.incr(
                        "tasks.store.symbolicate_event.fatal",
                        tags={
                            "reason":
                            "error",
                            "symbolication_function":
                            symbolication_function.__name__,
                        },
                    )
                    error_logger.exception(
                        "tasks.store.symbolicate_event.symbolication")
                    data.setdefault("_metrics",
                                    {})["flag.processing.error"] = True
                    data.setdefault("_metrics",
                                    {})["flag.processing.fatal"] = True
                    has_changed = True
                    break

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    process_task = process_event_from_reprocessing if from_reprocessing else process_event
    _do_process_event(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        process_task=process_task,
        data=data,
        data_has_changed=has_changed,
        from_symbolicate=True,
    )
Exemple #5
0
def _do_save_event(
    cache_key: Optional[str] = None,
    data: Optional[Event] = None,
    start_time: Optional[int] = None,
    event_id: Optional[str] = None,
    project_id: Optional[int] = None,
    **kwargs: Any,
) -> None:
    """
    Saves an event to the database.
    """

    set_current_event_project(project_id)

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = processing.event_processing_store.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")
            set_current_event_project(project_id)

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        try:
            if killswitch_matches_context(
                    "store.load-shed-save-event-projects",
                {
                    "project_id": project_id,
                    "event_type": event_type,
                    "platform": data.get("platform") or "none",
                },
            ):
                raise HashDiscarded("Load shedding save_event")

            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                manager.save(project_id,
                             assume_normalized=True,
                             start_time=start_time,
                             cache_key=cache_key)
                # Put the updated event back into the cache so that post_process
                # has the most recent data.
                data = manager.get_data()
                if isinstance(data, CANONICAL_TYPES):
                    data = dict(data.items())
                with metrics.timer(
                        "tasks.store.do_save_event.write_processing_cache"):
                    processing.event_processing_store.store(data)
        except HashDiscarded:
            # Delete the event payload from cache since it won't show up in post-processing.
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    processing.event_processing_store.delete_by_key(cache_key)

        finally:
            reprocessing2.mark_event_reprocessed(data)
            if cache_key:
                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing(
                    "events.time-to-process",
                    time() - start_time,
                    instance=data["platform"],
                    tags={
                        "is_reprocessing2":
                        "true" if reprocessing2.is_reprocessed_event(data) else
                        "false",
                    },
                )

            time_synthetic_monitoring_event(data, project_id, start_time)
Exemple #6
0
def _load_event(
    message: Message, projects: Mapping[int, Project]
) -> Optional[Tuple[Any, Callable[[str], None]]]:
    """
    Perform some initial filtering and deserialize the message payload. If the
    event should be stored, the deserialized payload is returned along with a
    function that can be called with the event's storage key to resume
    processing after the event has been persisted and is available to be read by
    other processing components.
    """
    payload = message["payload"]
    start_time = float(message["start_time"])
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    remote_addr = message.get("remote_addr")
    attachments = message.get("attachments") or ()

    sentry_sdk.set_extra("event_id", event_id)
    sentry_sdk.set_extra("len_attachments", len(attachments))

    if project_id == settings.SENTRY_PROJECT:
        metrics.incr("internal.captured.ingest_consumer.unparsed")

    # check that we haven't already processed this event (a previous instance of the forwarder
    # died before it could commit the event queue offset)
    #
    # XXX(markus): I believe this code is extremely broken:
    #
    # * it practically uses memcached in prod which has no consistency
    #   guarantees (no idea how we don't run into issues there)
    #
    # * a TTL of 1h basically doesn't guarantee any deduplication at all. It
    #   just guarantees a good error message... for one hour.
    #
    # This code has been ripped from the old python store endpoint. We're
    # keeping it around because it does provide some protection against
    # reprocessing good events if a single consumer is in a restart loop.
    deduplication_key = f"ev:{project_id}:{event_id}"
    if cache.get(deduplication_key) is not None:
        logger.warning(
            "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.",
            event_id,
            project_id,
        )
        return  # message already processed do not reprocess

    if killswitch_matches_context(
        "store.load-shed-pipeline-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "has_attachments": bool(attachments),
        },
    ):
        # This killswitch is for the worst of scenarios and should probably not
        # cause additional load on our logging infrastructure
        return

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s", project_id)
        return

    # Parse the JSON payload. This is required to compute the cache key and
    # call process_event. The payload will be put into Kafka raw, to avoid
    # serializing it again.
    # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event
    # which assumes that data passed in is a raw dictionary.
    data = json.loads(payload)

    if project_id == settings.SENTRY_PROJECT:
        metrics.incr(
            "internal.captured.ingest_consumer.parsed",
            tags={"event_type": data.get("type") or "null"},
        )

    if killswitch_matches_context(
        "store.load-shed-parsed-pipeline-projects",
        {
            "organization_id": project.organization_id,
            "project_id": project.id,
            "event_type": data.get("type") or "null",
            "has_attachments": bool(attachments),
            "event_id": event_id,
        },
    ):
        return

    def dispatch_task(cache_key: str) -> None:
        if attachments:
            with sentry_sdk.start_span(op="ingest_consumer.set_attachment_cache"):
                attachment_objects = [
                    CachedAttachment(type=attachment.pop("attachment_type"), **attachment)
                    for attachment in attachments
                ]

                attachment_cache.set(
                    cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT
                )

        # Preprocess this event, which spawns either process_event or
        # save_event. Pass data explicitly to avoid fetching it again from the
        # cache.
        with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"):
            preprocess_event(
                cache_key=cache_key,
                data=data,
                start_time=start_time,
                event_id=event_id,
                project=project,
            )

        # remember for an 1 hour that we saved this event (deduplication protection)
        cache.set(deduplication_key, "", CACHE_TIMEOUT)

        # emit event_accepted once everything is done
        event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)

    return data, dispatch_task
Exemple #7
0
def _do_symbolicate_event(
    cache_key, start_time, event_id, symbolicate_task, data=None, queue_switches=0
):
    from sentry.lang.native.processing import get_symbolication_function

    if data is None:
        data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False
        )
        error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key})
        return

    data = CanonicalKeyDict(data)

    project_id = data["project"]
    set_current_event_project(project_id)

    event_id = data["event_id"]

    from_reprocessing = (
        symbolicate_task is symbolicate_event_from_reprocessing
        or symbolicate_task is symbolicate_event_from_reprocessing_low_priority
    )

    # check whether the event is in the wrong queue and if so, move it to the other one.
    # we do this at most SYMBOLICATOR_MAX_QUEUE_SWITCHES times.
    if queue_switches >= SYMBOLICATOR_MAX_QUEUE_SWITCHES:
        metrics.gauge("tasks.store.symbolicate_event.low_priority.max_queue_switches", 1)
    else:
        is_low_priority = symbolicate_task in [
            symbolicate_event_low_priority,
            symbolicate_event_from_reprocessing_low_priority,
        ]
        should_be_low_priority = should_demote_symbolication(project_id)

        if is_low_priority != should_be_low_priority:
            metrics.gauge("tasks.store.symbolicate_event.low_priority.wrong_queue", 1)
            submit_symbolicate(
                should_be_low_priority,
                from_reprocessing,
                cache_key,
                event_id,
                start_time,
                data,
                queue_switches + 1,
            )
            return

    def _continue_to_process_event():
        process_task = process_event_from_reprocessing if from_reprocessing else process_event
        _do_process_event(
            cache_key=cache_key,
            start_time=start_time,
            event_id=event_id,
            process_task=process_task,
            data=data,
            data_has_changed=has_changed,
            from_symbolicate=True,
        )

    symbolication_function = get_symbolication_function(data)
    symbolication_function_name = getattr(symbolication_function, "__name__", "none")

    if killswitch_matches_context(
        "store.load-shed-symbolicate-event-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "platform": data.get("platform") or "null",
            "symbolication_function": symbolication_function_name,
        },
    ):
        return _continue_to_process_event()

    has_changed = False

    symbolication_start_time = time()

    submission_ratio = options.get("symbolicate-event.low-priority.metrics.submission-rate")
    submit_realtime_metrics = not from_reprocessing and random.random() < submission_ratio

    if submit_realtime_metrics:
        with sentry_sdk.start_span(op="tasks.store.symbolicate_event.low_priority.metrics.counter"):
            timestamp = int(symbolication_start_time)
            try:
                realtime_metrics.increment_project_event_counter(project_id, timestamp)
            except Exception as e:
                sentry_sdk.capture_exception(e)

    with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span:
        span.set_data("symbolication_function", symbolication_function_name)
        with metrics.timer(
            "tasks.store.symbolicate_event.symbolication",
            tags={"symbolication_function": symbolication_function_name},
        ):
            while True:
                try:
                    with sentry_sdk.start_span(
                        op="tasks.store.symbolicate_event.%s" % symbolication_function_name
                    ) as span:
                        symbolicated_data = symbolication_function(data)
                        span.set_data("symbolicated_data", bool(symbolicated_data))

                    if symbolicated_data:
                        data = symbolicated_data
                        has_changed = True

                    break
                except RetrySymbolication as e:
                    if (
                        time() - symbolication_start_time
                    ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT:
                        error_logger.warning(
                            "symbolicate.slow",
                            extra={"project_id": project_id, "event_id": event_id},
                        )
                    if (
                        time() - symbolication_start_time
                    ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT:
                        # Do not drop event but actually continue with rest of pipeline
                        # (persisting unsymbolicated event)
                        metrics.incr(
                            "tasks.store.symbolicate_event.fatal",
                            tags={
                                "reason": "timeout",
                                "symbolication_function": symbolication_function_name,
                            },
                        )
                        error_logger.exception(
                            "symbolicate.failed.infinite_retry",
                            extra={"project_id": project_id, "event_id": event_id},
                        )
                        data.setdefault("_metrics", {})["flag.processing.error"] = True
                        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
                        has_changed = True
                        break
                    else:
                        # sleep for `retry_after` but max 5 seconds and try again
                        metrics.incr(
                            "tasks.store.symbolicate_event.retry",
                            tags={"symbolication_function": symbolication_function_name},
                        )
                        sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER))
                        continue
                except Exception:
                    metrics.incr(
                        "tasks.store.symbolicate_event.fatal",
                        tags={
                            "reason": "error",
                            "symbolication_function": symbolication_function_name,
                        },
                    )
                    error_logger.exception("tasks.store.symbolicate_event.symbolication")
                    data.setdefault("_metrics", {})["flag.processing.error"] = True
                    data.setdefault("_metrics", {})["flag.processing.fatal"] = True
                    has_changed = True
                    break

    if submit_realtime_metrics:
        with sentry_sdk.start_span(
            op="tasks.store.symbolicate_event.low_priority.metrics.histogram"
        ):
            symbolication_duration = int(time() - symbolication_start_time)
            try:
                realtime_metrics.increment_project_duration_counter(
                    project_id, timestamp, symbolication_duration
                )
            except Exception as e:
                sentry_sdk.capture_exception(e)

    # We cannot persist canonical types in the cache, so we need to
    # downgrade this.
    if isinstance(data, CANONICAL_TYPES):
        data = dict(data.items())

    if has_changed:
        cache_key = event_processing_store.store(data)

    return _continue_to_process_event()
def handle_owner_assignment(project, group, event):
    from sentry.models import GroupAssignee, ProjectOwnership

    with metrics.timer("post_process.handle_owner_assignment"):
        with sentry_sdk.start_span(
                op="post_process.handle_owner_assignment.cache_set_owner"):
            owner_key = "owner_exists:1:%s" % group.id
            owners_exists = cache.get(owner_key)
            if owners_exists is None:
                owners_exists = group.groupowner_set.exists()
                # Cache for an hour if it's assigned. We don't need to move that fast.
                cache.set(owner_key, owners_exists,
                          3600 if owners_exists else 60)

        with sentry_sdk.start_span(
                op="post_process.handle_owner_assignment.cache_set_assignee"):
            # Is the issue already assigned to a team or user?
            assignee_key = "assignee_exists:1:%s" % group.id
            assignees_exists = cache.get(assignee_key)
            if assignees_exists is None:
                assignees_exists = group.assignee_set.exists()
                # Cache for an hour if it's assigned. We don't need to move that fast.
                cache.set(assignee_key, assignees_exists,
                          3600 if assignees_exists else 60)

        if owners_exists and assignees_exists:
            return

        with sentry_sdk.start_span(
                op="post_process.handle_owner_assignment.get_autoassign_owners"
        ):
            if killswitch_matches_context(
                    "post_process.get-autoassign-owners",
                {
                    "project_id": project.id,
                },
            ):
                # see ProjectOwnership.get_autoassign_owners
                auto_assignment = False
                owners = []
                assigned_by_codeowners = False
            else:
                (
                    auto_assignment,
                    owners,
                    assigned_by_codeowners,
                ) = ProjectOwnership.get_autoassign_owners(
                    group.project_id, event.data)

        with sentry_sdk.start_span(
                op="post_process.handle_owner_assignment.analytics_record"):
            if auto_assignment and owners and not assignees_exists:
                assignment = GroupAssignee.objects.assign(group,
                                                          owners[0],
                                                          create_only=True)
                if assignment["new_assignment"] or assignment[
                        "updated_assignment"]:
                    analytics.record(
                        "codeowners.assignment" if assigned_by_codeowners else
                        "issueowners.assignment",
                        organization_id=project.organization_id,
                        project_id=project.id,
                        group_id=group.id,
                    )

        with sentry_sdk.start_span(
                op="post_process.handle_owner_assignment.handle_group_owners"):
            if owners and not owners_exists:
                try:
                    handle_group_owners(project, group, owners)
                except Exception:
                    logger.exception("Failed to store group owners")
Exemple #9
0
def buffered_delete_old_primary_hash(
    project_id,
    group_id,
    event_id=None,
    datetime=None,
    old_primary_hash=None,
    current_primary_hash=None,
    force_flush_batch: bool = False,
):
    """
    In case the primary hash changed during reprocessing, we need to tell
    Snuba before reinserting the event. Snuba may then insert a tombstone row
    depending on whether the primary_hash is part of the PK/sortkey or not.

    Only when the primary_hash changed and is part of the sortkey, we need to
    explicitly tombstone the old row.

    If the primary_hash is not part of the PK/sortkey, or if the primary_hash
    did not change, nothing needs to be done as ClickHouse's table merge will
    merge the two rows together.

    Like `buffered_handle_remaining_events`, this is a quick and dirty way to
    batch event IDs so requests to tombstone rows are not being individually
    sent over to Snuba.

    This also includes the same constraints for optimal performance as
    `buffered_handle_remaining_events` in that events being fed to this should
    have datetimes as close to each other as possible. Unfortunately, this
    function is invoked by tasks that are run asynchronously and therefore the
    guarantee from `buffered_handle_remaining_events` regarding events being
    sorted by timestamps is not applicable here.

    This function also does not batch events which have different old primary
    hashes together into one operation. This means that if the data being fed
    in tends to have a 1:1 ratio of event:old primary hashes, then the buffering
    in this effectively does nothing.
    """

    from sentry import killswitches

    if killswitches.killswitch_matches_context(
            "reprocessing2.drop-delete-old-primary-hash",
        {"project_id": project_id}):
        return

    client = _get_sync_redis_client()

    # This is a meta key that contains old primary hashes. These hashes are then
    # combined with other values to construct a key that points to a list of
    # tombstonable events.
    primary_hash_set_key = f"re2:tombstone-primary-hashes:{project_id}:{group_id}"
    old_primary_hashes = client.smembers(primary_hash_set_key)

    def build_event_key(primary_hash):
        return f"re2:tombstones:{{{project_id}:{group_id}:{primary_hash}}}"

    if old_primary_hash is not None and old_primary_hash != current_primary_hash:
        event_key = build_event_key(old_primary_hash)
        client.lpush(event_key, f"{to_timestamp(datetime)};{event_id}")
        client.expire(event_key, settings.SENTRY_REPROCESSING_SYNC_TTL)

        if old_primary_hash not in old_primary_hashes:
            old_primary_hashes.add(old_primary_hash)
            client.sadd(primary_hash_set_key, old_primary_hash)
            client.expire(primary_hash_set_key,
                          settings.SENTRY_REPROCESSING_SYNC_TTL)

    # Events for a group are split and bucketed by their primary hashes. If flushing is to be
    # performed on a per-group basis, the event count needs to be summed up across all buckets
    # belonging to a single group.
    event_count = 0
    for primary_hash in old_primary_hashes:
        key = build_event_key(primary_hash)
        event_count += client.llen(key)

    if force_flush_batch or event_count > settings.SENTRY_REPROCESSING_REMAINING_EVENTS_BUF_SIZE:
        with sentry_sdk.start_span(
                op=
                "sentry.reprocessing2.buffered_delete_old_primary_hash.flush_events"
        ):
            for primary_hash in old_primary_hashes:
                event_key = build_event_key(primary_hash)
                event_ids, from_date, to_date = pop_batched_events_from_redis(
                    event_key)

                # Racing might be happening between two different tasks. Give up on the
                # task that's lagging behind by prematurely terminating flushing.
                if len(event_ids) == 0:
                    with sentry_sdk.configure_scope() as scope:
                        scope.set_tag("project_id", project_id)
                        scope.set_tag("old_group_id", group_id)
                        scope.set_tag("old_primary_hash", old_primary_hash)

                    logger.error(
                        "reprocessing2.buffered_delete_old_primary_hash.empty_batch"
                    )
                    return

                from sentry import eventstream

                # In the worst case scenario, a group will have a 1:1 mapping of primary hashes to
                # events, which means 1 insert per event.
                # The overall performance of this will be marginally better than the unbatched version
                # if a group has a lot of old primary hashes.
                eventstream.tombstone_events_unsafe(
                    project_id,
                    event_ids,
                    old_primary_hash=old_primary_hash,
                    from_timestamp=from_date,
                    to_timestamp=to_date,
                )

        # Try to track counts so if it turns out that tombstoned events trend towards a ratio of 1
        # event per hash, a different solution may need to be considered.
        ratio = 0 if len(
            old_primary_hashes) == 0 else event_count / len(old_primary_hashes)
        metrics.timing(
            key="reprocessing2.buffered_delete_old_primary_hash.event_count",
            value=event_count,
        )
        metrics.timing(
            key=
            "reprocessing2.buffered_delete_old_primary_hash.primary_hash_count",
            value=len(old_primary_hashes),
        )
        metrics.timing(
            key=
            "reprocessing2.buffered_delete_old_primary_hash.primary_hash_to_event_ratio",
            value=ratio,
        )