def post_process_group(is_new, is_regression, is_new_group_environment, cache_key, group_id=None, event=None, **kwargs): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.utils import snuba from sentry.reprocessing2 import is_reprocessed_event with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. # # While we always send the cache_key and never send the event parameter now, # the code to handle `event` has to stick around for a self-hosted release cycle. if cache_key and event is None: data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={ "cache_key": cache_key, "reason": "missing_cache" }, ) return event = Event(project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data) elif event and check_event_already_post_processed(event): if cache_key: event_processing_store.delete_by_key(cache_key) logger.info( "post_process.skipped", extra={ "reason": "duplicate", "project_id": event.project_id, "event_id": event.event_id, }, ) return if is_reprocessed_event(event.data): logger.info( "post_process.skipped", extra={ "project_id": event.project_id, "event_id": event.event_id, "reason": "reprocessed", }, ) return set_current_project(event.project_id) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Project, Organization, EventDict from sentry.models.group import get_group_with_redirect from sentry.rules.processor import RuleProcessor from sentry.tasks.servicehooks import process_service_hook # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) if event.group_id: # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project._organization_cache = Organization.objects.get_from_cache( id=event.project.organization_id) bind_organization_context(event.project.organization) _capture_stats(event, is_new) if event.group_id: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = False if is_new else process_snoozes(event.group) handle_owner_assignment(event.project, event.group, event) rp = RuleProcessor(event, is_new, is_regression, is_new_group_environment, has_reappeared) has_alert = False # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True with sentry_sdk.start_transaction(op="post_process_group", name="rule_processor_apply", sampled=True): safe_execute(callback, event, futures) if features.has("projects:servicehooks", project=event.project): allowed_events = set(["event.created"]) if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks( project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay( servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type( ) == "error" and _should_send_error_created_hooks(event.project): process_resource_change_bound.delay(action="created", sender="Error", instance_id=event.event_id, instance=event) if is_new: process_resource_change_bound.delay(action="created", sender="Group", instance_id=event.group_id) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group(plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression) event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def post_process_group( is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs ): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.reprocessing2 import is_reprocessed_event from sentry.utils import snuba with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={"cache_key": cache_key, "reason": "missing_cache"}, ) return event = Event( project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data ) set_current_event_project(event.project_id) is_transaction_event = not bool(event.group_id) from sentry.models import EventDict, Organization, Project # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project.set_cached_field_value( "organization", Organization.objects.get_from_cache(id=event.project.organization_id) ) # Simplified post processing for transaction events. # This should eventually be completely removed and transactions # will not go through any post processing. if is_transaction_event: transaction_processed.send_robust( sender=post_process_group, project=event.project, event=event, ) event_processing_store.delete_by_key(cache_key) return is_reprocessed = is_reprocessed_event(event.data) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Commit, GroupInboxReason from sentry.models.group import get_group_with_redirect from sentry.models.groupinbox import add_group_to_inbox from sentry.rules.processor import RuleProcessor from sentry.tasks.groupowner import process_suspect_commits from sentry.tasks.servicehooks import process_service_hook # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id event.group.project = event.project event.group.project.set_cached_field_value("organization", event.project.organization) bind_organization_context(event.project.organization) _capture_stats(event, is_new) if is_reprocessed and is_new: add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED) if not is_reprocessed: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = False if is_new else process_snoozes(event.group) if not has_reappeared: # If true, we added the .UNIGNORED reason already if is_new: add_group_to_inbox(event.group, GroupInboxReason.NEW) elif is_regression: add_group_to_inbox(event.group, GroupInboxReason.REGRESSION) handle_owner_assignment(event.project, event.group, event) rp = RuleProcessor( event, is_new, is_regression, is_new_group_environment, has_reappeared ) has_alert = False # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True safe_execute(callback, event, futures, _with_transaction=False) try: lock = locks.get( f"w-o:{event.group_id}-d-l", duration=10, ) with lock.acquire(): has_commit_key = f"w-o:{event.project.organization_id}-h-c" org_has_commit = cache.get(has_commit_key) if org_has_commit is None: org_has_commit = Commit.objects.filter( organization_id=event.project.organization_id ).exists() cache.set(has_commit_key, org_has_commit, 3600) if org_has_commit: group_cache_key = f"w-o-i:g-{event.group_id}" if cache.get(group_cache_key): metrics.incr( "sentry.tasks.process_suspect_commits.debounce", tags={"detail": "w-o-i:g debounce"}, ) else: from sentry.utils.committers import get_frame_paths cache.set(group_cache_key, True, 604800) # 1 week in seconds event_frames = get_frame_paths(event.data) process_suspect_commits.delay( event_id=event.event_id, event_platform=event.platform, event_frames=event_frames, group_id=event.group_id, project_id=event.project_id, ) except UnableToAcquireLock: pass except Exception: logger.exception("Failed to process suspect commits") if features.has("projects:servicehooks", project=event.project): allowed_events = {"event.created"} if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks(project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay(servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type() == "error" and _should_send_error_created_hooks( event.project ): process_resource_change_bound.delay( action="created", sender="Error", instance_id=event.event_id, instance=event ) if is_new: process_resource_change_bound.delay( action="created", sender="Group", instance_id=event.group_id ) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group( plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression ) from sentry import similarity safe_execute(similarity.record, event.project, [event], _with_transaction=False) # Patch attachments that were ingested on the standalone path. update_existing_attachments(event) if not is_reprocessed: event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def backfill_eventstream(apps, schema_editor): """ Inserts Postgres events into the eventstream if there are recent events in Postgres. This is for open source users migrating from 9.x who want to keep their events. If there are no recent events in Postgres, skip the backfill. """ from sentry import eventstore, eventstream from sentry.utils.query import RangeQuerySetWrapper Event = apps.get_model("sentry", "Event") Group = apps.get_model("sentry", "Group") Project = apps.get_model("sentry", "Project") # Kill switch to skip this migration skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False) # Use 90 day retention if the option has not been set or set to 0 DEFAULT_RETENTION = 90 retention_days = options.get( "system.event-retention-days") or DEFAULT_RETENTION def get_events(last_days): to_date = timezone.now() from_date = to_date - timedelta(days=last_days) return Event.objects.filter(datetime__gte=from_date, datetime__lte=to_date, group_id__isnull=False) def _attach_related(_events): project_ids = set() group_ids = set() for event in _events: project_ids.add(event.project_id) group_ids.add(event.group_id) projects = { p.id: p for p in Project.objects.filter(id__in=project_ids) } groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)} for event in _events: event.project = projects.get(event.project_id) event.group = groups.get(event.group_id) # When migrating old data from Sentry 9.0.0 to 9.1.2 to 10 in rapid succession, the event timestamp may be # missing. This adds it back if "timestamp" not in event.data.data: event.data.data["timestamp"] = to_timestamp(event.datetime) eventstore.bind_nodes(_events, "data") if skip_backfill: print("Skipping backfill.\n") # noqa: B314 return events = get_events(retention_days) count = events.count() if count == 0: print("Nothing to do, skipping migration.\n") # noqa: B314 return print(f"Events to process: {count}\n") # noqa: B314 processed = 0 for e in RangeQuerySetWrapper(events, step=100, callbacks=(_attach_related, )): event_data = e.data.data if e.project is None or e.group is None or len(event_data) == 0: print( # noqa: B314 f"Skipped {e} as group, project or node data information is invalid.\n" ) continue event = NewEvent(project_id=e.project_id, event_id=e.event_id, group_id=e.group_id, data=event_data) event.group = e.group event.project = e.project try: eventstream.insert( group=event.group, event=event, is_new=False, is_regression=False, is_new_group_environment=False, primary_hash=event.get_primary_hash(), received_timestamp=event.data.get("received") or float(event.datetime.strftime("%s")), skip_consume=True, ) # The node ID format was changed in Sentry 9.1.0 # (https://github.com/getsentry/sentry/commit/f73a4039d16a5c4f88bde37f6464cac21deb50e1) # If we are migrating from older versions of Sentry (i.e. 9.0.0 and earlier) # we need to resave the node using the new node ID scheme and delete the old # node. old_node_id = e.data.id new_node_id = event.data.id if old_node_id != new_node_id: event.data.save() nodestore.delete(old_node_id) processed += 1 except Exception as error: print( # noqa: B314 f"An error occured while trying to migrate the following event: {event}\n.----\n{error}" ) if processed == 0: raise Exception( "Cannot migrate any event. If this is okay, re-run migrations with SENTRY_SKIP_EVENTS_BACKFILL_FOR_10 environment variable set to skip this step." ) print(f"Event migration done. Migrated {processed} of {count} events.\n" ) # noqa: B314