def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }) return Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) event = manager.save(project_id) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache( id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append( (tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) attachment_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager, track_outcome from sentry import quotas from sentry.models import ProjectKey if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome(event.project.organization_id, event.project.id, key_id, 'accepted', None, timestamp) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome(project.organization_id, project_id, key_id, 'filtered', reason, timestamp) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) with metrics.timer("tasks.store.do_save_event.track_outcome"): # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None): """ After normalizing and processing an event, save adjacent models such as releases and environments to postgres and write the event into eventstream. From there it will be picked up by Snuba and post-processing. We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize(project_id=project_id) self._normalized = True with metrics.timer("event_manager.save.project.get_from_cache"): project = Project.objects.get_from_cache(id=project_id) projects = {project.id: project} if self._data.get("type") == "transaction": self._data["project"] = int(project_id) job = {"data": self._data, "start_time": start_time} jobs = save_transaction_events([job], projects) return jobs[0]["event"] with metrics.timer("event_manager.save.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id ) job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time} jobs = [job] _pull_out_data(jobs, projects) _get_or_create_release_many(jobs, projects) _get_event_user_many(jobs, projects) with metrics.timer("event_manager.load_grouping_config"): # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(job["data"], project) ) with metrics.timer("event_manager.normalize_stacktraces_for_grouping"): normalize_stacktraces_for_grouping(job["data"], grouping_config) _derive_plugin_tags_many(jobs, projects) _derive_interface_tags_many(jobs) with metrics.timer("event_manager.apply_server_fingerprinting"): # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"] apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project)) with metrics.timer("event_manager.event.get_hashes"): # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = job["event"].get_hashes() except GroupingConfigNotFound: job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project) hashes = job["event"].get_hashes() job["data"]["hashes"] = hashes _materialize_metadata_many(jobs) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(job["materialized_metadata"]) group_metadata["last_received"] = job["received_timestamp"] kwargs = { "platform": job["platform"], "message": job["event"].search_message, "culprit": job["culprit"], "logger": job["logger_name"], "level": LOG_LEVELS_MAP.get(job["level"]), "last_seen": job["event"].datetime, "first_seen": job["event"].datetime, "active_at": job["event"].datetime, "data": group_metadata, } if job["release"]: kwargs["first_release"] = job["release"] try: job["group"], job["is_new"], job["is_regression"] = _save_aggregate( event=job["event"], hashes=hashes, release=job["release"], **kwargs ) except HashDiscarded: project_key = None if job["key_id"] is not None: try: project_key = ProjectKey.objects.get_from_cache(id=job["key_id"]) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( org_id=project.organization_id, project_id=project_id, key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.DISCARDED_HASH, timestamp=to_datetime(job["start_time"]), event_id=job["event"].event_id, category=job["category"], ) metrics.incr( "events.discarded", skip_internal=True, tags={"organization_id": project.organization_id, "platform": job["platform"]}, ) raise job["event"].group = job["group"] # store a reference to the group id to guarantee validation of isolation # XXX(markus): No clue what this does job["event"].data.bind_ref(job["event"]) _get_or_create_environment_many(jobs, projects) if job["group"]: group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create( group_id=job["group"].id, environment_id=job["environment"].id, defaults={"first_release": job["release"] or None}, ) else: job["is_new_group_environment"] = False _get_or_create_release_associated_models(jobs, projects) if job["release"] and job["group"]: job["grouprelease"] = GroupRelease.get_or_create( group=job["group"], release=job["release"], environment=job["environment"], datetime=job["event"].datetime, ) _tsdb_record_all_metrics(jobs) if job["group"]: UserReport.objects.filter(project=project, event_id=job["event"].event_id).update( group=job["group"], environment=job["environment"] ) _materialize_event_metrics(jobs) # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. attachments = get_attachments(cache_key, job["event"]) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type,) job["event_metrics"][key] = (job["event_metrics"].get(key) or 0) + len(attachment.data) _nodestore_save_many(jobs) if job["release"]: if job["is_new"]: buffer.incr( ReleaseProject, {"new_groups": 1}, {"release_id": job["release"].id, "project_id": project.id}, ) if job["is_new_group_environment"]: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": job["release"].id, "environment_id": job["environment"].id, }, ) if not raw: if not project.first_event: project.update(first_event=job["event"].datetime) first_event_received.send_robust( project=project, event=job["event"], sender=Project ) _eventstream_insert_many(jobs) # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. save_attachments(attachments, job["event"]) metric_tags = {"from_relay": "_relay_processed" in job["data"]} metrics.timing( "events.latency", job["received_timestamp"] - job["recorded_timestamp"], tags=metric_tags, ) metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(job["data"].get("errors") or ()), tags=metric_tags, ) _track_outcome_accepted_many(jobs) self._data = job["event"].data.data return job["event"]
def save_event(cache_key=None, data=None, start_time=None, event_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if event_id is None and data is not None: event_id = data['event_id'] if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }) return project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) manager.save(project_id) except HashDiscarded as exc: # TODO(jess): remove this before it goes out to a wider audience info_logger.info('discarded.hash', extra={ 'project_id': project_id, 'description': exc.message, }) tsdb.incr( tsdb.models.project_total_received_discarded, project_id, timestamp=to_datetime(start_time) if start_time is not None else None, ) try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache( id=data['key_id']) except ProjectKey.DoesNotExist: pass quotas.refund( project, key=project_key, timestamp=start_time, ) finally: if cache_key: default_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'}) return Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) manager.save(project_id) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache(id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append((tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def filter_attachments_for_group(attachments, job): """ Removes crash reports exceeding the group-limit. If the project or organization is configured to limit the amount of crash reports per group, the number of stored crashes is limited. This requires `event.group` to be set. Emits one outcome per removed attachment. :param attachments: The full list of attachments to filter. :param job: The job context container. """ if not attachments: return attachments event = job["event"] project = event.project # The setting is both an organization and project setting. The project # setting strictly overrides the organization setting, unless set to the # default. max_crashreports = get_max_crashreports(project, allow_none=True) if max_crashreports is None: max_crashreports = get_max_crashreports(project.organization) # The number of crash reports is cached per group crashreports_key = get_crashreport_key(event.group_id) # Only fetch the number of stored crash reports if there is a crash report # in the list of attachments. Otherwise, we won't require this number. if any(attachment.type in CRASH_REPORT_TYPES for attachment in attachments): cached_reports = get_stored_crashreports(crashreports_key, event, max_crashreports) else: cached_reports = 0 stored_reports = cached_reports filtered = [] refund_quantity = 0 for attachment in attachments: # If the attachment is a crash report (e.g. minidump), we need to honor # the store_crash_reports setting. Otherwise, we assume that the client # has already verified PII and just store the attachment. if attachment.type in CRASH_REPORT_TYPES: if crashreports_exceeded(stored_reports, max_crashreports): # Indicate that the crash report has been removed due to a limit # on the maximum number of crash reports. If this flag is True, # it indicates that there are *other* events in the same group # that store a crash report. This flag will therefore *not* be # set if storage of crash reports is completely disabled. if max_crashreports > 0: job["data"]["metadata"]["stripped_crash"] = True track_outcome( org_id=event.project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.CRASH_REPORT_LIMIT, timestamp=to_datetime(job["start_time"]), event_id=event.event_id, category=DataCategory.ATTACHMENT, quantity=attachment.size, ) # Quotas are counted with at least ``1`` for attachments. refund_quantity += attachment.size or 1 continue stored_reports += 1 filtered.append(attachment) # Check if we have exceeded the stored crash reports count. If so, we # persist the current maximum (not the actual number!) into the cache. Next # time when loading from the cache, we will validate that this number has # not changed, or otherwise re-fetch from the database. if crashreports_exceeded(stored_reports, max_crashreports) and stored_reports > cached_reports: cache.set(crashreports_key, max_crashreports, CRASH_REPORT_TIMEOUT) if refund_quantity: quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=DataCategory.ATTACHMENT, quantity=refund_quantity, ) return filtered
def discard_event(job, attachments): """ Refunds consumed quotas for an event and its attachments. For the event and each dropped attachment, an outcome FILTERED(discarded-hash) is emitted. :param job: The job context container. :param attachments: The full list of attachments to filter. """ project = job["event"].project quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=job["category"], quantity=1, ) track_outcome( org_id=project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.DISCARDED_HASH, timestamp=to_datetime(job["start_time"]), event_id=job["event"].event_id, category=job["category"], ) attachment_quantity = 0 for attachment in attachments: # Quotas are counted with at least ``1`` for attachments. attachment_quantity += attachment.size or 1 track_outcome( org_id=project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.DISCARDED_HASH, timestamp=to_datetime(job["start_time"]), event_id=job["event"].event_id, category=DataCategory.ATTACHMENT, quantity=attachment.size, ) if attachment_quantity: quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=DataCategory.ATTACHMENT, quantity=attachment_quantity, ) metrics.incr( "events.discarded", skip_internal=True, tags={"platform": job["platform"]}, )