def reprocess_event(project_id, event_id, start_time): from sentry.event_manager import set_tag from sentry.tasks.store import preprocess_event_from_reprocessing from sentry.ingest.ingest_consumer import CACHE_TIMEOUT # Take unprocessed data from old event and save it as unprocessed data # under a new event ID. The second step happens in pre-process. We could # save the "original event ID" instead and get away with writing less to # nodestore, but doing it this way makes the logic slightly simpler. node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id) with sentry_sdk.start_span(op="reprocess_events.nodestore.get"): data = nodestore.get(node_id) with sentry_sdk.start_span(op="reprocess_events.eventstore.get"): event = eventstore.get_event_by_id(project_id, event_id) if event is None: logger.error("reprocessing2.event.not_found", extra={ "project_id": project_id, "event_id": event_id }) return if data is None: logger.error( "reprocessing2.reprocessing_nodestore.not_found", extra={ "project_id": project_id, "event_id": event_id }, ) # We have no real data for reprocessing. We assume this event goes # straight to save_event, and hope that the event data can be # reingested like that. It's better than data loss. # # XXX: Ideally we would run a "save-lite" for this that only updates # the group ID in-place. Like a snuba merge message. data = dict(event.data) # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store set_tag(data, "original_group_id", event.group_id) cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache queryset = models.EventAttachment.objects.filter( project_id=project_id, event_id=event_id).select_related("file") attachment_objects = [] for attachment_id, attachment in enumerate(queryset): with sentry_sdk.start_span( op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, )) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing(cache_key=cache_key, start_time=start_time, event_id=event_id)
def reprocess_event(project_id, event_id, start_time): from sentry.tasks.store import preprocess_event_from_reprocessing from sentry.ingest.ingest_consumer import CACHE_TIMEOUT with sentry_sdk.start_span(op="reprocess_events.nodestore.get"): node_id = Event.generate_node_id(project_id, event_id) data = nodestore.get(node_id, subkey="unprocessed") if data is None: node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id) data = nodestore.get(node_id) with sentry_sdk.start_span(op="reprocess_events.eventstore.get"): event = eventstore.get_event_by_id(project_id, event_id) if event is None: logger.error( "reprocessing2.event.not_found", extra={"project_id": project_id, "event_id": event_id} ) return if data is None: logger.error( "reprocessing2.reprocessing_nodestore.not_found", extra={"project_id": project_id, "event_id": event_id}, ) # We have no real data for reprocessing. We assume this event goes # straight to save_event, and hope that the event data can be # reingested like that. It's better than data loss. # # XXX: Ideally we would run a "save-lite" for this that only updates # the group ID in-place. Like a snuba merge message. data = dict(event.data) # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id) cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache queryset = models.EventAttachment.objects.filter(project_id=project_id, event_id=event_id) files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in queryset])} attachment_objects = [] for attachment_id, attachment in enumerate(queryset): with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, file=files[attachment.file_id], cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, ) ) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing( cache_key=cache_key, start_time=start_time, event_id=event_id )
def reprocess_event(project_id, event_id, start_time): node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id) with sentry_sdk.start_span(op="reprocess_events.nodestore.get"): data = nodestore.get(node_id) if data is None: return from sentry.event_manager import set_tag from sentry.tasks.store import preprocess_event_from_reprocessing from sentry.ingest.ingest_consumer import CACHE_TIMEOUT # Take unprocessed data from old event and save it as unprocessed data # under a new event ID. The second step happens in pre-process. We could # save the "original event ID" instead and get away with writing less to # nodestore, but doing it this way makes the logic slightly simpler. # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store orig_event_id = data["event_id"] set_tag(data, "original_event_id", orig_event_id) event = eventstore.get_event_by_id(project_id, orig_event_id) if event is None: return set_tag(data, "original_group_id", event.group_id) # XXX: reuse event IDs event_id = data["event_id"] = uuid.uuid4().hex cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache queryset = models.EventAttachment.objects.filter( project_id=project_id, event_id=orig_event_id).select_related("file") attachment_objects = [] for attachment_id, attachment in enumerate(queryset): with sentry_sdk.start_span( op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, )) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing(cache_key=cache_key, start_time=start_time, event_id=event_id)
def reprocess_event(project_id, event_id, start_time): from sentry.ingest.ingest_consumer import CACHE_TIMEOUT from sentry.lang.native.processing import get_required_attachment_types from sentry.tasks.store import preprocess_event_from_reprocessing with sentry_sdk.start_span(op="reprocess_events.nodestore.get"): node_id = Event.generate_node_id(project_id, event_id) data = nodestore.get(node_id, subkey="unprocessed") if data is None: node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id) data = nodestore.get(node_id) if data is None: raise CannotReprocess("reprocessing_nodestore.not_found") with sentry_sdk.start_span(op="reprocess_events.eventstore.get"): event = eventstore.get_event_by_id(project_id, event_id) if event is None: raise CannotReprocess("event.not_found") required_attachment_types = get_required_attachment_types(data) attachments = list( models.EventAttachment.objects.filter( project_id=project_id, event_id=event_id, type__in=list(required_attachment_types) ) ) missing_attachment_types = required_attachment_types - {ea.type for ea in attachments} if missing_attachment_types: raise CannotReprocess( f"attachment.not_found.{'_and_'.join(sorted(missing_attachment_types))}" ) # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id) set_path( data, "contexts", "reprocessing", "original_primary_hash", value=event.get_primary_hash() ) cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache. Note that we can only # consider minidumps because filestore just stays as-is after reprocessing # (we simply update group_id on the EventAttachment models in post_process) attachment_objects = [] files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in attachments])} for attachment_id, attachment in enumerate(attachments): with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, file=files[attachment.file_id], cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, ) ) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing( cache_key=cache_key, start_time=start_time, event_id=event_id, data=data, )
def reprocess_event(project_id, event_id, start_time): from sentry.ingest.ingest_consumer import CACHE_TIMEOUT from sentry.tasks.store import preprocess_event_from_reprocessing reprocessable_event = pull_event_data(project_id, event_id) data = reprocessable_event.data event = reprocessable_event.event attachments = reprocessable_event.attachments # Step 1: Fix up the event payload for reprocessing and put it in event # cache/event_processing_store set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id) set_path(data, "contexts", "reprocessing", "original_primary_hash", value=event.get_primary_hash()) cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache. Note that we can only # consider minidumps because filestore just stays as-is after reprocessing # (we simply update group_id on the EventAttachment models in post_process) attachment_objects = [] files = { f.id: f for f in models.File.objects.filter( id__in=[ea.file_id for ea in attachments]) } for attachment_id, attachment in enumerate(attachments): with sentry_sdk.start_span( op="reprocess_event._copy_attachment_into_cache") as span: span.set_data("attachment_id", attachment.id) attachment_objects.append( _copy_attachment_into_cache( attachment_id=attachment_id, attachment=attachment, file=files[attachment.file_id], cache_key=cache_key, cache_timeout=CACHE_TIMEOUT, )) if attachment_objects: with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"): attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) preprocess_event_from_reprocessing( cache_key=cache_key, start_time=start_time, event_id=event_id, data=data, )