def test_scrub_data_in_processing(field): project_config = ProjectConfig( None, config={ "datascrubbingSettings": { "excludeFields": [], "scrubData": True, "scrubIpAddresses": False, "sensitiveFields": ["a"], "scrubDefaults": False, }, "piiConfig": {}, }, ) new_field = u"new_{}".format(field) old_event = {"extra": {field: "do not remove"}} event = {"extra": {field: "do not remove", new_field: "do remove"}} new_event = scrub_data(project_config, event, in_processing=True, old_event=old_event) assert new_event == { u"_meta": { u"extra": {new_field: {u"": {u"len": 9, u"rem": [[u"strip-fields", u"s", 0, 10]]}}} }, u"extra": {field: u"do not remove", new_field: u"[Filtered]"}, }
def test_scrub_data(field, default_project): project = default_project organization = project.organization organization.update_option( "sentry:relay_pii_config", """ { "applications": { "debug_meta.images.*.code_file": ["@userpath:replace"], "debug_meta.images.*.debug_file": ["@userpath:replace"] } } """, ) organization.update_option("sentry:safe_fields", []) organization.update_option("sentry:sensitive_fields", ["a"]) organization.update_option("sentry:scrub_ip_address", False) organization.update_option("sentry:require_scrub_data", True) event = { "extra": {field: "pls remove"}, "debug_meta": { "images": [ {"type": "symbolic", "debug_file": "/Users/foo/bar", "code_file": "/Users/foo/bar"} ] }, } new_event = scrub_data(project, event) assert new_event == ( { "_meta": { "debug_meta": { "images": { "0": { "code_file": { "": {"len": 10, "rem": [["@userpath:replace", "s", 7, 13]]} }, "debug_file": { "": {"len": 10, "rem": [["@userpath:replace", "s", 7, 13]]} }, } } }, "extra": {field: {"": {"len": 10, "rem": [["strip-fields", "s", 0, 10]]}}}, }, "debug_meta": { "images": [ { "code_file": "/Users/[user]/bar", "debug_file": "/Users/[user]/bar", "type": "symbolic", } ] }, "extra": {field: "[Filtered]"}, } )
def test_scrub_data(field): project_config = ProjectConfig( None, config={ "datascrubbingSettings": { "excludeFields": [], "scrubData": True, "scrubIpAddresses": False, "sensitiveFields": ["a"], "scrubDefaults": False, }, "piiConfig": { "applications": { "debug_meta.images.*.code_file": ["@userpath:replace"], "debug_meta.images.*.debug_file": ["@userpath:replace"], } }, }, ) event = { "extra": { field: "pls remove" }, "debug_meta": { "images": [{ "type": "symbolic", "debug_file": "/Users/foo/bar", "code_file": "/Users/foo/bar" }] }, } new_event = scrub_data(project_config, event) assert new_event == ({ u"_meta": { u"debug_meta": { u"images": { u"0": { u"code_file": { u"": { u"len": 10, u"rem": [[u"@userpath:replace", u"s", 7, 13]] } }, u"debug_file": { u"": { u"len": 10, u"rem": [[u"@userpath:replace", u"s", 7, 13]] } }, } } }, u"extra": { field: { u"": { u"len": 10, u"rem": [[u"strip-fields", u"s", 0, 10]] } } }, }, u"debug_meta": { u"images": [{ u"code_file": u"/Users/[user]/bar", u"debug_file": u"/Users/[user]/bar", u"type": u"symbolic", }] }, u"extra": { field: u"[Filtered]" }, })
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] data_category = DataCategory.from_event_type(data.get("type")) if should_filter: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, category=data_category, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, category=data_category, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, category=data_category, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) if not signals_in_consumer: event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if not signals_in_consumer: event_dropped.send_robust(ip=remote_addr, project=project, reason_code=reason, sender=process_event) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) data = scrub_data(project_config, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id