def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) if not signals_in_consumer: event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if not signals_in_consumer: event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id,)) config = project_config.config datascrubbing_settings = config.get("datascrubbingSettings") or {} data = _scrub_event_data(data, datascrubbing_settings) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_manager.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=rate_limit.reason_code if rate_limit else None, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_manager.get_data() del event_manager event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, relay_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data['event_id'] if should_filter: track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( relay_config.project_id, event_id, ) if cache.get(cache_key) is not None: track_outcome(relay_config.organization_id, relay_config.project_id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id) raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) config = relay_config.config scrub_ip_address = config.get('scrub_ip_addresses') scrub_data = config.get('scrub_data') if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields = config.get('sensitive_fields') exclude_fields = config.get('exclude_fields') scrub_defaults = config.get('scrub_defaults') SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_manager.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr( 'events.blacklisted', tags={'reason': filter_reason}, skip_internal=False ) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=rate_limit.reason_code if rate_limit else None, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_manager.get_data() del event_manager event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) event_dropped.send_robust(ip=remote_addr, project=project, reason_code=reason, sender=process_event) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) config = project_config.config datascrubbing_settings = config.get("datascrubbingSettings") or {} scrub_ip_address = datascrubbing_settings.get("scrubIpAddresses") scrub_data = datascrubbing_settings.get("scrubData") if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields = datascrubbing_settings.get("sensitiveFields") exclude_fields = datascrubbing_settings.get("excludeFields") scrub_defaults = datascrubbing_settings.get("scrubDefaults") SensitiveDataFilter(fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id