def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ set_current_event_project(project_id) from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = event_processing_store.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") set_current_event_project(project_id) # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. manager.save(project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key) # Put the updated event back into the cache so that post_process # has the most recent data. data = manager.get_data() if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) with metrics.timer( "tasks.store.do_save_event.write_processing_cache"): event_processing_store.store(data) except HashDiscarded: # Delete the event payload from cache since it won't show up in post-processing. if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): event_processing_store.delete_by_key(cache_key) finally: reprocessing2.mark_event_reprocessed(data) if cache_key: with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"]) time_synthetic_monitoring_event(data, project_id, start_time)
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) with metrics.timer("tasks.store.do_save_event.track_outcome"): # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def create_failed_event(cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None): """If processing failed we put the original data from the cache into a raw event. Returns `True` if a failed event was inserted """ set_current_event_project(project_id) # We can only create failed events for events that can potentially # create failed events. if not reprocessing.event_supports_reprocessing(data): return False # If this event has just been reprocessed with reprocessing-v2, we don't # put it through reprocessing-v1 again. The value of reprocessing-v2 is # partially that one sees the entire event even in its failed state, all # the time. if reprocessing2.is_reprocessed_event(data): return False reprocessing_active = ProjectOption.objects.get_value( project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT) # In case there is reprocessing active but the current reprocessing # revision is already different than when we started, we want to # immediately retry the event. This resolves the problem when # otherwise a concurrent change of debug symbols might leave a # reprocessing issue stuck in the project forever. if (reprocessing_active and reprocessing.get_reprocessing_revision( project_id, cached=False) != reprocessing_rev): raise RetryProcessing() # The first time we encounter a failed event and the hint was cleared # we send a notification. sent_notification = ProjectOption.objects.get_value( project_id, "sentry:sent_failed_event_hint", False) if not sent_notification: project = Project.objects.get_from_cache(id=project_id) Activity.objects.create( type=Activity.NEW_PROCESSING_ISSUES, project=project, datetime=to_datetime(start_time), data={ "reprocessing_active": reprocessing_active, "issues": issues }, ).send_notification() ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True) # If reprocessing is not active we bail now without creating the # processing issues if not reprocessing_active: return False # We need to get the original data here instead of passing the data in # from the last processing step because we do not want any # modifications to take place. delete_raw_event(project_id, event_id) data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "raw" }, skip_internal=False) error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key}) return True data = CanonicalKeyDict(data) from sentry.models import ProcessingIssue, RawEvent raw_event = RawEvent.objects.create( project_id=project_id, event_id=event_id, datetime=datetime.utcfromtimestamp( data["timestamp"]).replace(tzinfo=timezone.utc), data=data, ) for issue in issues: ProcessingIssue.objects.record_processing_issue( raw_event=raw_event, scope=issue["scope"], object=issue["object"], type=issue["type"], data=issue["data"], ) event_processing_store.delete_by_key(cache_key) return True
def _do_save_event( cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs ): """ Saves an event to the database. """ from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer("tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get("type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False ) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save( project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key ) except HashDiscarded: pass finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer("tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None ): attachment_cache.delete(cache_key) if start_time: metrics.timing( "events.time-to-process", time() - start_time, instance=data["platform"] )