def dispatch_task(cache_key: str) -> None: if attachments: with sentry_sdk.start_span(op="ingest_consumer.set_attachment_cache"): attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set( cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT ) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"): preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)
def process_message(self, message): message = msgpack.unpackb(message.value(), use_list=False) payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = message["project_id"] remote_addr = message.get("remote_addr") # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return True # message already processed do not reprocess try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: logger.error("Project for ingested event does not exist: %s", project_id) return True # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_timeout = 3600 cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, cache_timeout) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", 3600) # emit event_accepted once everything is done event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=self.process_message ) # Return *something* so that it counts against batch size return True
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) if not signals_in_consumer: event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if not signals_in_consumer: event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id,)) config = project_config.config datascrubbing_settings = config.get("datascrubbingSettings") or {} data = _scrub_event_data(data, datascrubbing_settings) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_manager.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=rate_limit.reason_code if rate_limit else None, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_manager.get_data() del event_manager event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process(self, request, project, auth, helper, data, **kwargs): metrics.incr('events.total') if not data: raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] data = LazyData( data=data, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), helper=helper, project=project, auth=auth, client_ip=remote_addr, ) event_received.send_robust( ip=remote_addr, sender=type(self), ) if helper.should_filter(project, data, ip_address=remote_addr): app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_blacklisted, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_blacklisted, project.organization_id), ]) metrics.incr('events.blacklisted') raise APIForbidden('Event dropped due to filter') # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(app.quotas.is_rate_limited, project=project, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug('Dropped event due to error with rate limiter') app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_rejected, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_rejected, project.organization_id), ]) metrics.incr('events.dropped') if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.organization_total_received, project.organization_id), ]) org_options = OrganizationOption.objects.get_all_values(project.organization_id) if org_options.get('sentry:require_scrub_ip_address', False): scrub_ip_address = True else: scrub_ip_address = project.get_option('sentry:scrub_ip_address', False) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id,) if cache.get(cache_key) is not None: raise APIForbidden('An event with the same ID already exists (%s)' % (event_id,)) if org_options.get('sentry:require_scrub_data', False): scrub_data = True else: scrub_data = project.get_option('sentry:scrub_data', True) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) if org_options.get('sentry:require_scrub_defaults', False): scrub_defaults = True else: scrub_defaults = project.get_option('sentry:scrub_defaults', True) inst = SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, ) inst.apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def process_event(message, projects): payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = int(message["project_id"]) remote_addr = message.get("remote_addr") attachments = message.get("attachments") or () # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, CACHE_TIMEOUT) if attachments: attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. preprocess_event(cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)
def _do_process_event(message, projects): payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = int(message["project_id"]) remote_addr = message.get("remote_addr") attachments = message.get("attachments") or () # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) # # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. # # This code has been ripped from the old python store endpoint. We're # keeping it around because it does provide some protection against # reprocessing good events if a single consumer is in a restart loop. deduplication_key = f"ev:{project_id}:{event_id}" if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_key = event_processing_store.store(data) if attachments: attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"): preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)
def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs): metrics.incr('events.total') if not data: raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] data = LazyData( data=data, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), helper=helper, project=project, key=key, auth=auth, client_ip=remote_addr, ) event_received.send_robust( ip=remote_addr, project=project, sender=type(self), ) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = helper.should_filter( project, data, ip_address=remote_addr) if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={ 'reason': filter_reason}) event_filtered.send_robust( ip=remote_addr, project=project, sender=type(self), ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug( 'Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', } ) event_dropped.send_robust( ip=remote_addr, project=project, sender=type(self), reason_code=rate_limit.reason_code if rate_limit else None, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, relay_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data['event_id'] if should_filter: track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( relay_config.project_id, event_id, ) if cache.get(cache_key) is not None: track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id) raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) config = relay_config.config scrub_ip_address = config.get('scrub_ip_addresses') scrub_data = config.get('scrub_data') if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields = config.get('sensitive_fields') exclude_fields = config.get('exclude_fields') scrub_defaults = config.get('scrub_defaults') SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process(self, request, project, auth, helper, data, **kwargs): metrics.incr('events.total') remote_addr = request.META['REMOTE_ADDR'] event_received.send_robust( ip=remote_addr, sender=type(self), ) if not is_valid_ip(remote_addr, project): app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_blacklisted, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_blacklisted, project.organization_id), ]) metrics.incr('events.blacklisted') raise APIForbidden('Blacklisted IP address: %s' % (remote_addr, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(app.quotas.is_rate_limited, project=project, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug( 'Dropped event due to error with rate limiter') app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_rejected, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_rejected, project.organization_id), ]) metrics.incr('events.dropped') if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.organization_total_received, project.organization_id), ]) content_encoding = request.META.get('HTTP_CONTENT_ENCODING', '') if isinstance(data, basestring): if content_encoding == 'gzip': data = helper.decompress_gzip(data) elif content_encoding == 'deflate': data = helper.decompress_deflate(data) elif not data.startswith('{'): data = helper.decode_and_decompress_data(data) data = helper.safely_load_json_string(data) # mutates data data = helper.validate_data(project, data) if 'sdk' not in data: sdk = helper.parse_client_as_sdk(auth.client) if sdk: data['sdk'] = sdk # mutates data manager = EventManager(data, version=auth.version) data = manager.normalize() org_options = OrganizationOption.objects.get_all_values( project.organization_id) if org_options.get('sentry:require_scrub_ip_address', False): scrub_ip_address = True else: scrub_ip_address = project.get_option('sentry:scrub_ip_address', False) # insert IP address if not available if auth.is_public and not scrub_ip_address: helper.ensure_has_ip(data, remote_addr) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) if org_options.get('sentry:require_scrub_data', False): scrub_data = True else: scrub_data = project.get_option('sentry:scrub_data', True) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) if org_options.get('sentry:require_scrub_defaults', False): scrub_defaults = True else: scrub_defaults = project.get_option('sentry:scrub_defaults', True) inst = SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, ) inst.apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) event_dropped.send_robust(ip=remote_addr, project=project, reason_code=reason, sender=process_event) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) config = project_config.config datascrubbing_settings = config.get("datascrubbingSettings") or {} scrub_ip_address = datascrubbing_settings.get("scrubIpAddresses") scrub_data = datascrubbing_settings.get("scrubData") if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields = datascrubbing_settings.get("sensitiveFields") exclude_fields = datascrubbing_settings.get("excludeFields") scrub_defaults = datascrubbing_settings.get("scrubDefaults") SensitiveDataFilter(fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data['event_id'] if should_filter: track_outcome( project.organization_id, project.id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id ) metrics.incr( 'events.blacklisted', tags={'reason': filter_reason}, skip_internal=False ) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome( project.organization_id, project.id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id ) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) org_options = OrganizationOption.objects.get_all_values( project.organization_id) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id, ) if cache.get(cache_key) is not None: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id ) raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process(self, request, project, auth, helper, data, **kwargs): metrics.incr('events.total') remote_addr = request.META['REMOTE_ADDR'] event_received.send_robust( ip=remote_addr, sender=type(self), ) if not is_valid_ip(remote_addr, project): app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_blacklisted, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_blacklisted, project.organization_id), ]) metrics.incr('events.blacklisted') raise APIForbidden('Blacklisted IP address: %s' % (remote_addr,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(app.quotas.is_rate_limited, project=project, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: helper.log.debug('Dropped event due to error with rate limiter') app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.project_total_rejected, project.id), (app.tsdb.models.organization_total_received, project.organization_id), (app.tsdb.models.organization_total_rejected, project.organization_id), ]) metrics.incr('events.dropped') if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: app.tsdb.incr_multi([ (app.tsdb.models.project_total_received, project.id), (app.tsdb.models.organization_total_received, project.organization_id), ]) content_encoding = request.META.get('HTTP_CONTENT_ENCODING', '') if isinstance(data, basestring): if content_encoding == 'gzip': data = helper.decompress_gzip(data) elif content_encoding == 'deflate': data = helper.decompress_deflate(data) elif not data.startswith('{'): data = helper.decode_and_decompress_data(data) data = helper.safely_load_json_string(data) # mutates data data = helper.validate_data(project, data) if 'sdk' not in data: sdk = helper.parse_client_as_sdk(auth.client) if sdk: data['sdk'] = sdk # mutates data manager = EventManager(data, version=auth.version) data = manager.normalize() org_options = OrganizationOption.objects.get_all_values(project.organization_id) if org_options.get('sentry:require_scrub_ip_address', False): scrub_ip_address = True else: scrub_ip_address = project.get_option('sentry:scrub_ip_address', False) # insert IP address if not available and wanted if not scrub_ip_address: helper.ensure_has_ip( data, remote_addr, set_if_missing=auth.is_public or data.get('platform') in ('javascript', 'cocoa', 'objc')) event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id,) if cache.get(cache_key) is not None: raise APIForbidden('An event with the same ID already exists (%s)' % (event_id,)) if org_options.get('sentry:require_scrub_data', False): scrub_data = True else: scrub_data = project.get_option('sentry:scrub_data', True) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) if org_options.get('sentry:require_scrub_defaults', False): scrub_defaults = True else: scrub_defaults = project.get_option('sentry:scrub_defaults', True) inst = SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, ) inst.apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data) cache.set(cache_key, '', 60 * 5) helper.log.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id