Example #1
0
def post_process_group(is_new,
                       is_regression,
                       is_new_group_environment,
                       cache_key,
                       group_id=None,
                       event=None,
                       **kwargs):
    """
    Fires post processing hooks for a group.
    """
    from sentry.eventstore.models import Event
    from sentry.eventstore.processing import event_processing_store
    from sentry.utils import snuba
    from sentry.reprocessing2 import is_reprocessed_event

    with snuba.options_override({"consistent": True}):
        # We use the data being present/missing in the processing store
        # to ensure that we don't duplicate work should the forwarding consumers
        # need to rewind history.
        #
        # While we always send the cache_key and never send the event parameter now,
        # the code to handle `event` has to stick around for a self-hosted release cycle.
        if cache_key and event is None:
            data = event_processing_store.get(cache_key)
            if not data:
                logger.info(
                    "post_process.skipped",
                    extra={
                        "cache_key": cache_key,
                        "reason": "missing_cache"
                    },
                )
                return
            event = Event(project_id=data["project"],
                          event_id=data["event_id"],
                          group_id=group_id,
                          data=data)
        elif event and check_event_already_post_processed(event):
            if cache_key:
                event_processing_store.delete_by_key(cache_key)
            logger.info(
                "post_process.skipped",
                extra={
                    "reason": "duplicate",
                    "project_id": event.project_id,
                    "event_id": event.event_id,
                },
            )
            return

        if is_reprocessed_event(event.data):
            logger.info(
                "post_process.skipped",
                extra={
                    "project_id": event.project_id,
                    "event_id": event.event_id,
                    "reason": "reprocessed",
                },
            )
            return

        set_current_project(event.project_id)

        # NOTE: we must pass through the full Event object, and not an
        # event_id since the Event object may not actually have been stored
        # in the database due to sampling.
        from sentry.models import Project, Organization, EventDict
        from sentry.models.group import get_group_with_redirect
        from sentry.rules.processor import RuleProcessor
        from sentry.tasks.servicehooks import process_service_hook

        # Re-bind node data to avoid renormalization. We only want to
        # renormalize when loading old data from the database.
        event.data = EventDict(event.data, skip_renormalization=True)

        if event.group_id:
            # Re-bind Group since we're reading the Event object
            # from cache, which may contain a stale group and project
            event.group, _ = get_group_with_redirect(event.group_id)
            event.group_id = event.group.id

        # Re-bind Project and Org since we're reading the Event object
        # from cache which may contain stale parent models.
        event.project = Project.objects.get_from_cache(id=event.project_id)
        event.project._organization_cache = Organization.objects.get_from_cache(
            id=event.project.organization_id)
        bind_organization_context(event.project.organization)

        _capture_stats(event, is_new)

        if event.group_id:
            # we process snoozes before rules as it might create a regression
            # but not if it's new because you can't immediately snooze a new group
            has_reappeared = False if is_new else process_snoozes(event.group)

            handle_owner_assignment(event.project, event.group, event)

            rp = RuleProcessor(event, is_new, is_regression,
                               is_new_group_environment, has_reappeared)
            has_alert = False
            # TODO(dcramer): ideally this would fanout, but serializing giant
            # objects back and forth isn't super efficient
            for callback, futures in rp.apply():
                has_alert = True
                with sentry_sdk.start_transaction(op="post_process_group",
                                                  name="rule_processor_apply",
                                                  sampled=True):
                    safe_execute(callback, event, futures)

            if features.has("projects:servicehooks", project=event.project):
                allowed_events = set(["event.created"])
                if has_alert:
                    allowed_events.add("event.alert")

                if allowed_events:
                    for servicehook_id, events in _get_service_hooks(
                            project_id=event.project_id):
                        if any(e in allowed_events for e in events):
                            process_service_hook.delay(
                                servicehook_id=servicehook_id, event=event)

            from sentry.tasks.sentry_apps import process_resource_change_bound

            if event.get_event_type(
            ) == "error" and _should_send_error_created_hooks(event.project):
                process_resource_change_bound.delay(action="created",
                                                    sender="Error",
                                                    instance_id=event.event_id,
                                                    instance=event)
            if is_new:
                process_resource_change_bound.delay(action="created",
                                                    sender="Group",
                                                    instance_id=event.group_id)

            from sentry.plugins.base import plugins

            for plugin in plugins.for_project(event.project):
                plugin_post_process_group(plugin_slug=plugin.slug,
                                          event=event,
                                          is_new=is_new,
                                          is_regresion=is_regression)

        event_processed.send_robust(
            sender=post_process_group,
            project=event.project,
            event=event,
            primary_hash=kwargs.get("primary_hash"),
        )
        with metrics.timer("tasks.post_process.delete_event_cache"):
            event_processing_store.delete_by_key(cache_key)
Example #2
0
def post_process_group(
    is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs
):
    """
    Fires post processing hooks for a group.
    """
    from sentry.eventstore.models import Event
    from sentry.eventstore.processing import event_processing_store
    from sentry.reprocessing2 import is_reprocessed_event
    from sentry.utils import snuba

    with snuba.options_override({"consistent": True}):
        # We use the data being present/missing in the processing store
        # to ensure that we don't duplicate work should the forwarding consumers
        # need to rewind history.
        data = event_processing_store.get(cache_key)
        if not data:
            logger.info(
                "post_process.skipped",
                extra={"cache_key": cache_key, "reason": "missing_cache"},
            )
            return
        event = Event(
            project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data
        )

        set_current_event_project(event.project_id)

        is_transaction_event = not bool(event.group_id)

        from sentry.models import EventDict, Organization, Project

        # Re-bind node data to avoid renormalization. We only want to
        # renormalize when loading old data from the database.
        event.data = EventDict(event.data, skip_renormalization=True)

        # Re-bind Project and Org since we're reading the Event object
        # from cache which may contain stale parent models.
        event.project = Project.objects.get_from_cache(id=event.project_id)
        event.project.set_cached_field_value(
            "organization", Organization.objects.get_from_cache(id=event.project.organization_id)
        )

        # Simplified post processing for transaction events.
        # This should eventually be completely removed and transactions
        # will not go through any post processing.
        if is_transaction_event:
            transaction_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
            )

            event_processing_store.delete_by_key(cache_key)

            return

        is_reprocessed = is_reprocessed_event(event.data)

        # NOTE: we must pass through the full Event object, and not an
        # event_id since the Event object may not actually have been stored
        # in the database due to sampling.
        from sentry.models import Commit, GroupInboxReason
        from sentry.models.group import get_group_with_redirect
        from sentry.models.groupinbox import add_group_to_inbox
        from sentry.rules.processor import RuleProcessor
        from sentry.tasks.groupowner import process_suspect_commits
        from sentry.tasks.servicehooks import process_service_hook

        # Re-bind Group since we're reading the Event object
        # from cache, which may contain a stale group and project
        event.group, _ = get_group_with_redirect(event.group_id)
        event.group_id = event.group.id

        event.group.project = event.project
        event.group.project.set_cached_field_value("organization", event.project.organization)

        bind_organization_context(event.project.organization)

        _capture_stats(event, is_new)

        if is_reprocessed and is_new:
            add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED)

        if not is_reprocessed:
            # we process snoozes before rules as it might create a regression
            # but not if it's new because you can't immediately snooze a new group
            has_reappeared = False if is_new else process_snoozes(event.group)
            if not has_reappeared:  # If true, we added the .UNIGNORED reason already
                if is_new:
                    add_group_to_inbox(event.group, GroupInboxReason.NEW)
                elif is_regression:
                    add_group_to_inbox(event.group, GroupInboxReason.REGRESSION)

            handle_owner_assignment(event.project, event.group, event)

            rp = RuleProcessor(
                event, is_new, is_regression, is_new_group_environment, has_reappeared
            )
            has_alert = False
            # TODO(dcramer): ideally this would fanout, but serializing giant
            # objects back and forth isn't super efficient
            for callback, futures in rp.apply():
                has_alert = True
                safe_execute(callback, event, futures, _with_transaction=False)

            try:
                lock = locks.get(
                    f"w-o:{event.group_id}-d-l",
                    duration=10,
                )
                with lock.acquire():
                    has_commit_key = f"w-o:{event.project.organization_id}-h-c"
                    org_has_commit = cache.get(has_commit_key)
                    if org_has_commit is None:
                        org_has_commit = Commit.objects.filter(
                            organization_id=event.project.organization_id
                        ).exists()
                        cache.set(has_commit_key, org_has_commit, 3600)

                    if org_has_commit:
                        group_cache_key = f"w-o-i:g-{event.group_id}"
                        if cache.get(group_cache_key):
                            metrics.incr(
                                "sentry.tasks.process_suspect_commits.debounce",
                                tags={"detail": "w-o-i:g debounce"},
                            )
                        else:
                            from sentry.utils.committers import get_frame_paths

                            cache.set(group_cache_key, True, 604800)  # 1 week in seconds
                            event_frames = get_frame_paths(event.data)
                            process_suspect_commits.delay(
                                event_id=event.event_id,
                                event_platform=event.platform,
                                event_frames=event_frames,
                                group_id=event.group_id,
                                project_id=event.project_id,
                            )
            except UnableToAcquireLock:
                pass
            except Exception:
                logger.exception("Failed to process suspect commits")

            if features.has("projects:servicehooks", project=event.project):
                allowed_events = {"event.created"}
                if has_alert:
                    allowed_events.add("event.alert")

                if allowed_events:
                    for servicehook_id, events in _get_service_hooks(project_id=event.project_id):
                        if any(e in allowed_events for e in events):
                            process_service_hook.delay(servicehook_id=servicehook_id, event=event)

            from sentry.tasks.sentry_apps import process_resource_change_bound

            if event.get_event_type() == "error" and _should_send_error_created_hooks(
                event.project
            ):
                process_resource_change_bound.delay(
                    action="created", sender="Error", instance_id=event.event_id, instance=event
                )
            if is_new:
                process_resource_change_bound.delay(
                    action="created", sender="Group", instance_id=event.group_id
                )

            from sentry.plugins.base import plugins

            for plugin in plugins.for_project(event.project):
                plugin_post_process_group(
                    plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression
                )

            from sentry import similarity

            safe_execute(similarity.record, event.project, [event], _with_transaction=False)

        # Patch attachments that were ingested on the standalone path.
        update_existing_attachments(event)

        if not is_reprocessed:
            event_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
                primary_hash=kwargs.get("primary_hash"),
            )

        with metrics.timer("tasks.post_process.delete_event_cache"):
            event_processing_store.delete_by_key(cache_key)
Example #3
0
def backfill_eventstream(apps, schema_editor):
    """
    Inserts Postgres events into the eventstream if there are recent events in Postgres.

    This is for open source users migrating from 9.x who want to keep their events.
    If there are no recent events in Postgres, skip the backfill.
    """
    from sentry import eventstore, eventstream
    from sentry.utils.query import RangeQuerySetWrapper

    Event = apps.get_model("sentry", "Event")
    Group = apps.get_model("sentry", "Group")
    Project = apps.get_model("sentry", "Project")

    # Kill switch to skip this migration
    skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False)

    # Use 90 day retention if the option has not been set or set to 0
    DEFAULT_RETENTION = 90
    retention_days = options.get(
        "system.event-retention-days") or DEFAULT_RETENTION

    def get_events(last_days):
        to_date = timezone.now()
        from_date = to_date - timedelta(days=last_days)
        return Event.objects.filter(datetime__gte=from_date,
                                    datetime__lte=to_date,
                                    group_id__isnull=False)

    def _attach_related(_events):
        project_ids = set()
        group_ids = set()
        for event in _events:
            project_ids.add(event.project_id)
            group_ids.add(event.group_id)
        projects = {
            p.id: p
            for p in Project.objects.filter(id__in=project_ids)
        }
        groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}

        for event in _events:
            event.project = projects.get(event.project_id)
            event.group = groups.get(event.group_id)
            # When migrating old data from Sentry 9.0.0 to 9.1.2 to 10 in rapid succession, the event timestamp may be
            # missing. This adds it back
            if "timestamp" not in event.data.data:
                event.data.data["timestamp"] = to_timestamp(event.datetime)
        eventstore.bind_nodes(_events, "data")

    if skip_backfill:
        print("Skipping backfill.\n")  # noqa: B314
        return

    events = get_events(retention_days)
    count = events.count()

    if count == 0:
        print("Nothing to do, skipping migration.\n")  # noqa: B314
        return

    print(f"Events to process: {count}\n")  # noqa: B314

    processed = 0
    for e in RangeQuerySetWrapper(events,
                                  step=100,
                                  callbacks=(_attach_related, )):
        event_data = e.data.data
        if e.project is None or e.group is None or len(event_data) == 0:
            print(  # noqa: B314
                f"Skipped {e} as group, project or node data information is invalid.\n"
            )
            continue

        event = NewEvent(project_id=e.project_id,
                         event_id=e.event_id,
                         group_id=e.group_id,
                         data=event_data)

        event.group = e.group
        event.project = e.project

        try:
            eventstream.insert(
                group=event.group,
                event=event,
                is_new=False,
                is_regression=False,
                is_new_group_environment=False,
                primary_hash=event.get_primary_hash(),
                received_timestamp=event.data.get("received")
                or float(event.datetime.strftime("%s")),
                skip_consume=True,
            )

            # The node ID format was changed in Sentry 9.1.0
            # (https://github.com/getsentry/sentry/commit/f73a4039d16a5c4f88bde37f6464cac21deb50e1)
            # If we are migrating from older versions of Sentry (i.e. 9.0.0 and earlier)
            # we need to resave the node using the new node ID scheme and delete the old
            # node.
            old_node_id = e.data.id
            new_node_id = event.data.id
            if old_node_id != new_node_id:
                event.data.save()
                nodestore.delete(old_node_id)

            processed += 1
        except Exception as error:
            print(  # noqa: B314
                f"An error occured while trying to migrate the following event: {event}\n.----\n{error}"
            )

    if processed == 0:
        raise Exception(
            "Cannot migrate any event. If this is okay, re-run migrations with SENTRY_SKIP_EVENTS_BACKFILL_FOR_10 environment variable set to skip this step."
        )

    print(f"Event migration done. Migrated {processed} of {count} events.\n"
          )  # noqa: B314