def ingest_consumer(consumer_types, all_consumer_types, **options): """ Runs an "ingest consumer" task. The "ingest consumer" tasks read events from a kafka topic (coming from Relay) and schedules process event celery tasks for them """ from sentry.ingest.ingest_consumer import get_ingest_consumer from sentry.utils import metrics if all_consumer_types: if consumer_types: raise click.ClickException( "Cannot specify --all-consumer types and --consumer-type at the same time" ) else: consumer_types = set(ConsumerType.all()) if not all_consumer_types and not consumer_types: raise click.ClickException( "Need to specify --all-consumer-types or --consumer-type") concurrency = options.pop("concurrency", None) if concurrency is not None: executor = ThreadPoolExecutor(concurrency) else: executor = None with metrics.global_tags(ingest_consumer_types=",".join( sorted(consumer_types)), _all_threads=True): get_ingest_consumer(consumer_types=consumer_types, executor=executor, **options).run()
def ingest_consumer(consumer_type, **options): """ Runs an "ingest consumer" task. The "ingest consumer" tasks read events from a kafka topic (coming from Relay) and schedules process event celery tasks for them """ from sentry.ingest.ingest_consumer import ConsumerType, get_ingest_consumer from sentry.utils import metrics if consumer_type == "events": consumer_type = ConsumerType.Events elif consumer_type == "transactions": consumer_type = ConsumerType.Transactions elif consumer_type == "attachments": consumer_type = ConsumerType.Attachments with metrics.global_tags(ingest_consumer_type=consumer_type, _all_threads=True): get_ingest_consumer(consumer_type=consumer_type, **options).run()
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ set_current_event_project(project_id) from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = event_processing_store.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") set_current_event_project(project_id) # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. manager.save(project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key) # Put the updated event back into the cache so that post_process # has the most recent data. data = manager.get_data() if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) with metrics.timer( "tasks.store.do_save_event.write_processing_cache"): event_processing_store.store(data) except HashDiscarded: # Delete the event payload from cache since it won't show up in post-processing. if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): event_processing_store.delete_by_key(cache_key) finally: reprocessing2.mark_event_reprocessed(data) if cache_key: with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"]) time_synthetic_monitoring_event(data, project_id, start_time)
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent event_type = "none" if cache_key and data is None: with metrics.timer( "tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get( "type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) with metrics.timer("tasks.store.do_save_event.track_outcome"): # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer( "tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def _do_save_event( cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs ): """ Saves an event to the database. """ from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer("tasks.store.do_save_event.get_cache") as metric_tags: data = default_cache.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get("type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False ) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save( project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key ) except HashDiscarded: pass finally: if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): default_cache.delete(cache_key) with metrics.timer("tasks.store.do_save_event.delete_attachment_cache"): # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None ): attachment_cache.delete(cache_key) if start_time: metrics.timing( "events.time-to-process", time() - start_time, instance=data["platform"] )