class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__( self, data, version="5", project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, project_config=None, sent_at=None, ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project # if not explicitly specified try to get the grouping from project_config if grouping_config is None and project_config is not None: config = project_config.config grouping_config = config.get("grouping_config") # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project( self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False self.project_config = project_config self.sent_at = sent_at def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop("interface")) report = data.pop("report") except KeyError: raise APIForbidden("No report or interface data") # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = report if isinstance( report, interface) else interface.from_raw(report) except jsonschema.ValidationError as e: raise APIError("Invalid security report: %s" % str(e).splitlines()[0]) def clean(d): return dict([x for x in d.items() if x[1]]) data.update({ "logger": "csp", "message": instance.get_message(), "culprit": instance.get_culprit(), instance.path: instance.to_json(), "tags": instance.get_tags(), "errors": [], "user": { "ip_address": self._client_ip }, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP "request": { "url": instance.get_origin(), "headers": clean({ "User-Agent": self._user_agent, "Referer": instance.get_referrer() }), }, }) self._data = data def normalize(self, project_id=None): with metrics.timer("events.store.normalize.duration"): self._normalize_impl(project_id=project_id) def _normalize_impl(self, project_id=None): if self._project and project_id and project_id != self._project.id: raise RuntimeError( "Initialized EventManager with one project ID and called save() with another one" ) if self._normalized: raise RuntimeError("Already normalized") self._normalized = True from sentry_relay.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else project_id, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, normalize_user_agent=True, sent_at=self.sent_at.isoformat() if self.sent_at is not None else None, **DEFAULT_STORE_NORMALIZER_ARGS) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data))) def should_filter(self): """ returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics """ for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter( self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self.project_config, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get("release") if release and not is_valid_release(self.project_config, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = (get_path(self._data, "logentry", "formatted") or get_path(self._data, "logentry", "message") or "") if error_message and not is_valid_error_message( self.project_config, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, "exception", "values", filter=True, default=[]): message = u": ".join( [_f for _f in map(exc.get, ["type", "value"]) if _f]) if message and not is_valid_error_message(self.project_config, message): return (True, FilterStatKeys.ERROR_MESSAGE) return should_filter_event(self.project_config, self._data) def get_data(self): return self._data @metrics.wraps("event_manager.save") def save(self, project_id, raw=False, assume_normalized=False, cache_key=None): """ After normalizing and processing an event, save adjacent models such as releases and environments to postgres and write the event into eventstream. From there it will be picked up by Snuba and post-processing. We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize(project_id=project_id) self._normalized = True with metrics.timer("event_manager.save.project.get_from_cache"): project = Project.objects.get_from_cache(id=project_id) projects = {project.id: project} if self._data.get("type") == "transaction": self._data["project"] = int(project_id) jobs = save_transaction_events([self._data], projects) return jobs[0]["event"] with metrics.timer("event_manager.save.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) job = {"data": self._data, "project_id": project_id, "raw": raw} jobs = [job] _pull_out_data(jobs, projects) _get_or_create_release_many(jobs, projects) _get_event_user_many(jobs, projects) with metrics.timer("event_manager.load_grouping_config"): # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(job["data"], project)) with metrics.timer("event_manager.normalize_stacktraces_for_grouping"): normalize_stacktraces_for_grouping(job["data"], grouping_config) _derive_plugin_tags_many(jobs, projects) _derive_interface_tags_many(jobs) with metrics.timer("event_manager.apply_server_fingerprinting"): # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. job["data"]["fingerprint"] = job["data"].get("fingerprint") or [ "{{ default }}" ] apply_server_fingerprinting( job["data"], get_fingerprinting_config_for_project(project)) with metrics.timer("event_manager.event.get_hashes"): # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = job["event"].get_hashes() except GroupingConfigNotFound: job["data"][ "grouping_config"] = get_grouping_config_dict_for_project( project) hashes = job["event"].get_hashes() job["data"]["hashes"] = hashes _materialize_metadata_many(jobs) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(job["materialized_metadata"]) group_metadata["last_received"] = job["received_timestamp"] kwargs = { "platform": job["platform"], "message": job["event"].search_message, "culprit": job["culprit"], "logger": job["logger_name"], "level": LOG_LEVELS_MAP.get(job["level"]), "last_seen": job["event"].datetime, "first_seen": job["event"].datetime, "active_at": job["event"].datetime, "data": group_metadata, } if job["release"]: kwargs["first_release"] = job["release"] try: job["group"], job["is_new"], job[ "is_regression"] = _save_aggregate(event=job["event"], hashes=hashes, release=job["release"], **kwargs) except HashDiscarded: event_discarded.send_robust(project=project, sender=EventManager) metrics.incr( "events.discarded", skip_internal=True, tags={ "organization_id": project.organization_id, "platform": job["platform"] }, ) raise job["event"].group = job["group"] _send_event_saved_signal_many(jobs, projects) # store a reference to the group id to guarantee validation of isolation # XXX(markus): No clue what this does job["event"].data.bind_ref(job["event"]) _get_or_create_environment_many(jobs, projects) if job["group"]: group_environment, job[ "is_new_group_environment"] = GroupEnvironment.get_or_create( group_id=job["group"].id, environment_id=job["environment"].id, defaults={"first_release": job["release"] or None}, ) else: job["is_new_group_environment"] = False _get_or_create_release_associated_models(jobs, projects) if job["release"] and job["group"]: job["grouprelease"] = GroupRelease.get_or_create( group=job["group"], release=job["release"], environment=job["environment"], datetime=job["event"].datetime, ) _tsdb_record_all_metrics(jobs) if job["group"]: UserReport.objects.filter(project=project, event_id=job["event"].event_id).update( group=job["group"], environment=job["environment"]) _materialize_event_metrics(jobs) # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. attachments = get_attachments(cache_key, job["event"]) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type, ) job["event_metrics"][key] = (job["event_metrics"].get(key) or 0) + len(attachment.data) _nodestore_save_many(jobs) if job["release"]: if job["is_new"]: buffer.incr( ReleaseProject, {"new_groups": 1}, { "release_id": job["release"].id, "project_id": project.id }, ) if job["is_new_group_environment"]: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": job["release"].id, "environment_id": job["environment"].id, }, ) if not raw: if not project.first_event: project.update(first_event=job["event"].datetime) first_event_received.send_robust(project=project, event=job["event"], sender=Project) _eventstream_insert_many(jobs) # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. save_attachments(attachments, job["event"]) metric_tags = {"from_relay": "_relay_processed" in job["data"]} metrics.timing( "events.latency", job["received_timestamp"] - job["recorded_timestamp"], tags=metric_tags, ) metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(job["data"].get("errors") or ()), tags=metric_tags, ) self._data = job["event"].data.data return job["event"]
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "post" }, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True, cache_key=cache_key) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has("organizations:event-attachments", event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def _do_process_event( cache_key, start_time, event_id, process_task, data=None, data_has_changed=None, from_symbolicate=False, ): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "process"}, skip_internal=False ) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_project(project_id) event_id = data["event_id"] with sentry_sdk.start_span(op="tasks.store.process_event.get_project_from_cache"): project = Project.objects.get_from_cache(id=project_id) has_changed = bool(data_has_changed) with sentry_sdk.start_span(op="tasks.store.process_event.get_reprocessing_revision"): # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Stacktrace based event processors. with sentry_sdk.start_span(op="task.store.process_event.stacktraces"): with metrics.timer( "tasks.store.process_event.stacktraces", tags={"from_symbolicate": from_symbolicate} ): new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # *Right now* the only sensitive data that is added in stacktrace # processing are usernames in filepaths, so we run directly after # stacktrace processors. # # We do not yet want to deal with context data produced by plugins like # sessionstack or fullstory (which are in `get_event_preprocessors`), as # this data is very unlikely to be sensitive data. This is why scrubbing # happens somewhere in the middle of the pipeline. # # On the other hand, Javascript event error translation is happening after # this block because it uses `get_event_preprocessors` instead of # `get_event_enhancers`. # # We are fairly confident, however, that this should run *before* # re-normalization as it is hard to find sensitive data in partially # trimmed strings. if ( has_changed and options.get("processing.can-use-scrubbers") and features.has("organizations:datascrubbers-v2", project.organization, actor=None) ): with sentry_sdk.start_span(op="task.store.datascrubbers.scrub"): with metrics.timer( "tasks.store.datascrubbers.scrub", tags={"from_symbolicate": from_symbolicate} ): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): with sentry_sdk.start_span(op="task.store.process_event.preprocessors") as span: span.set_data("plugin", plugin.slug) span.set_data("from_symbolicate", from_symbolicate) with metrics.timer( "tasks.store.process_event.preprocessors", tags={"plugin": plugin.slug, "from_symbolicate": from_symbolicate}, ): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data["project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer( remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS ) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. _do_preprocess_event(cache_key, data, start_time, event_id, process_task, project) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'post' }) return with configure_scope() as scope: scope.set_tag("project", project_id) try: manager = EventManager(data) event = manager.save(project_id) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache( id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append( (tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) attachment_cache.delete(cache_key) if start_time: metrics.timing('events.time-to-process', time() - start_time, instance=data['platform'])
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute( enhancer, data, _passthrough_errors=(RetrySymbolication, )) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception("process.failed.infinite_retry") else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by preprocessor" project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def load_data( platform, default=None, sample_name=None, timestamp=None, start_timestamp=None, trace=None, span=None, ): # NOTE: Before editing this data, make sure you understand the context # in which its being used. It is NOT only used for local development and # has production consequences. # * bin/load-mocks to generate fake data for local testing # * When a new project is created, a fake event is generated as a "starter" # event so it's not an empty project. # * When a user clicks Test Configuration from notification plugin settings page, # a fake event is generated to go through the pipeline. data = None language = None platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform) if platform_data is not None and platform_data["type"] != "language": language = platform_data["language"] samples_root = os.path.join(DATA_ROOT, "samples") all_samples = set(f for f in os.listdir(samples_root) if f.endswith(".json")) for platform in (platform, language, default): if not platform: continue # Verify by checking if the file is within our folder explicitly # avoids being able to have a name that invokes traversing directories. json_path = "{}.json".format(platform) if json_path not in all_samples: continue if not sample_name: try: sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]["name"] except KeyError: pass # XXX: At this point, it's assumed that `json_path` was safely found # within `samples_root` due to the check above and cannot traverse # into paths. with open(os.path.join(samples_root, json_path)) as fp: data = json.load(fp) break if data is None: return data = CanonicalKeyDict(data) if platform in ("csp", "hkpk", "expectct", "expectstaple"): return data # Generate a timestamp in the present. if timestamp is None: timestamp = timezone.now() else: timestamp = timestamp.replace(tzinfo=pytz.utc) data.setdefault("timestamp", to_timestamp(timestamp)) if data.get("type") == "transaction": if start_timestamp is None: start_timestamp = timestamp - timedelta(seconds=3) else: start_timestamp = start_timestamp.replace(tzinfo=pytz.utc) data["start_timestamp"] = to_timestamp(start_timestamp) if trace is None: trace = uuid4().hex if span is None: span = uuid4().hex[:16] for tag in data["tags"]: if tag[0] == "trace": tag[1] = trace elif tag[0] == "trace.span": tag[1] = span data["contexts"]["trace"]["trace_id"] = trace data["contexts"]["trace"]["span_id"] = span for span in data.get("spans", []): # Use data to generate span timestamps consistently and based # on event timestamp duration = span.get("data", {}).get("duration", 10.0) offset = span.get("data", {}).get("offset", 0) span_start = data["start_timestamp"] + offset span["trace_id"] = trace span.setdefault("start_timestamp", span_start) span.setdefault("timestamp", span_start + duration) measurements = data.get("measurements") if measurements: measurement_markers = {} for key, entry in measurements.items(): if key in ["fp", "fcp", "lcp", "fid"]: measurement_markers["mark.{}".format(key)] = { "value": round(data["start_timestamp"] + entry["value"] / 1000, 3) } measurements.update(measurement_markers) data["platform"] = platform # XXX: Message is a legacy alias for logentry. Do not overwrite if set. if "message" not in data: data["message"] = "This is an example %s exception" % (sample_name or platform, ) data.setdefault( "user", generate_user(ip_address="127.0.0.1", username="******", id=1, email="*****@*****.**"), ) data.setdefault( "extra", { "session": { "foo": "bar" }, "results": [1, 2, 3, 4, 5], "emptyList": [], "emptyMap": {}, "length": 10837790, "unauthorized": False, "url": "http://example.org/foo/bar/", }, ) data.setdefault("modules", {"my.package": "1.0.0"}) data.setdefault( "request", { "cookies": "foo=bar;biz=baz", "url": "http://example.com/foo", "headers": { "Referer": "http://example.com", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36", }, "env": { "ENV": "prod" }, "query_string": "foo=bar", "data": '{"hello": "world"}', "method": "GET", }, ) return data
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async( args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__( self, data, version="5", project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, project_config=None, ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project # if not explicitly specified try to get the grouping from project_config if grouping_config is None and project_config is not None: config = project_config.config grouping_config = config.get("grouping_config") # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project( self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False self.project_config = project_config def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop("interface")) report = data.pop("report") except KeyError: raise APIForbidden("No report or interface data") # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = report if isinstance( report, interface) else interface.from_raw(report) except jsonschema.ValidationError as e: raise APIError("Invalid security report: %s" % str(e).splitlines()[0]) def clean(d): return dict(filter(lambda x: x[1], d.items())) data.update({ "logger": "csp", "message": instance.get_message(), "culprit": instance.get_culprit(), instance.path: instance.to_json(), "tags": instance.get_tags(), "errors": [], "user": { "ip_address": self._client_ip }, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP "request": { "url": instance.get_origin(), "headers": clean({ "User-Agent": self._user_agent, "Referer": instance.get_referrer() }), }, }) self._data = data def normalize(self): with metrics.timer("events.store.normalize.duration"): self._normalize_impl() def _normalize_impl(self): if self._normalized: raise RuntimeError("Already normalized") self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, normalize_user_agent=True, **DEFAULT_STORE_NORMALIZER_ARGS) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data))) def should_filter(self): """ returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics """ for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter( self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self.project_config, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get("release") if release and not is_valid_release(self.project_config, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = (get_path(self._data, "logentry", "formatted") or get_path(self._data, "logentry", "message") or "") if error_message and not is_valid_error_message( self.project_config, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, "exception", "values", filter=True, default=[]): message = u": ".join(filter(None, map(exc.get, ["type", "value"]))) if message and not is_valid_error_message(self.project_config, message): return (True, FilterStatKeys.ERROR_MESSAGE) return should_filter_event(self.project_config, self._data) def get_data(self): return self._data def _get_event_instance(self, project_id=None): if options.get("store.use-django-event"): data = self._data event_id = data.get("event_id") platform = data.get("platform") recorded_timestamp = data.get("timestamp") date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) time_spent = data.get("time_spent") data["node_id"] = Event.generate_node_id(project_id, event_id) return Event( project_id=project_id or self._project.id, event_id=event_id, data=EventDict(data, skip_renormalization=True), time_spent=time_spent, datetime=date, platform=platform, ) else: data = self._data event_id = data.get("event_id") return eventstore.create_event( project_id=project_id or self._project.id, event_id=event_id, group_id=None, data=EventDict(data, skip_renormalization=True), ) def get_culprit(self): """Helper to calculate the default culprit""" return force_text( self._data.get("culprit") or self._data.get("transaction") or generate_culprit(self._data) or "") def get_event_type(self): """Returns the event type.""" return eventtypes.get(self._data.get("type", "default"))() def materialize_metadata(self): """Returns the materialized metadata to be merged with group or event data. This currently produces the keys `type`, `metadata`, `title` and `location`. This should most likely also produce `culprit` here. """ event_type = self.get_event_type() event_metadata = event_type.get_metadata(self._data) return { "type": event_type.key, "metadata": event_metadata, "title": event_type.get_title(event_metadata), "location": event_type.get_location(event_metadata), } def get_search_message(self, event_metadata=None, culprit=None): """This generates the internal event.message attribute which is used for search purposes. It adds a bunch of data from the metadata and the culprit. """ if event_metadata is None: event_metadata = self.get_event_type().get_metadata(self._data) if culprit is None: culprit = self.get_culprit() data = self._data message = "" if data.get("logentry"): message += data["logentry"].get( "formatted") or data["logentry"].get("message") or "" if event_metadata: for value in six.itervalues(event_metadata): value_u = force_text(value, errors="replace") if value_u not in message: message = u"{} {}".format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors="replace") message = u"{} {}".format(message, culprit_u) return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) def save(self, project_id, raw=False, assume_normalized=False): """ We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get("level") # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get("transaction") logger_name = data.get("logger") release = data.get("release") dist = data.get("dist") environment = data.get("environment") recorded_timestamp = data.get("timestamp") # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Right now the event type is the signal to skip the group. This # is going to change a lot. if event.get_event_type() == "transaction": issueless_event = True else: issueless_event = False # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, "tags", value=[]) set_tag(data, "level", level) if logger_name: set_tag(data, "logger", logger_name) if environment: set_tag(data, "environment", environment) if transaction_name: set_tag(data, "transaction", transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, "release") release = Release.get_or_create(project=project, version=release, date_added=date) set_tag(data, "sentry:release", release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, "dist") set_tag(data, "sentry:dist", dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, "user") set_tag(data, "sentry:user", event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. data["fingerprint"] = data.get("fingerprint") or ["{{ default }}"] apply_server_fingerprinting( data, get_fingerprinting_config_for_project(project)) # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = event.get_hashes() except GroupingConfigNotFound: data["grouping_config"] = get_grouping_config_dict_for_project( project) hashes = event.get_hashes() data["hashes"] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overridden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata["metadata"] data.update(materialized_metadata) data["culprit"] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get("received") or float( event.datetime.strftime("%s")) if not issueless_event: # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata["last_received"] = received_timestamp kwargs = { "platform": platform, "message": event.message, "culprit": culprit, "logger": logger_name, "level": LOG_LEVELS_MAP.get(level), "last_seen": date, "first_seen": date, "active_at": date, "data": group_metadata, } if release: kwargs["first_release"] = release try: group, is_new, is_regression = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs) except HashDiscarded: event_discarded.send_robust(project=project, sender=EventManager) metrics.incr( "events.discarded", skip_internal=True, tags={ "organization_id": project.organization_id, "platform": platform }, ) raise else: event_saved.send_robust(project=project, event_size=event.size, sender=EventManager) event.group = group else: group = None is_new = False is_regression = False event_saved.send_robust(project=project, event_size=event.size, sender=EventManager) # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) environment = Environment.get_or_create(project=project, name=environment) if group: group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={"first_release": release if release else None}, ) else: is_new_group_environment = False if release: ReleaseEnvironment.get_or_create(project=project, release=release, environment=environment, datetime=date) ReleaseProjectEnvironment.get_or_create(project=project, release=release, environment=environment, datetime=date) if group: grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date) counters = [(tsdb.models.project, project.id)] if group: counters.append((tsdb.models.group, group.id)) if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [] if group: frequencies.append((tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1 } })) if release: frequencies.append((tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1 } })) if frequencies: tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) if group: UserReport.objects.filter(project=project, event_id=event_id).update( group=group, environment=environment) # Write the event to Nodestore event.data.save() if event_user: counters = [(tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, ))] if group: counters.append((tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, ))) tsdb.record_multi(counters, timestamp=event.datetime, environment_id=environment.id) if release: if is_new: buffer.incr( ReleaseProject, {"new_groups": 1}, { "release_id": release.id, "project_id": project.id }, ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": release.id, "environment_id": environment.id, }, ) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, event=event, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metric_tags = {"from_relay": "_relay_processed" in self._data} metrics.timing("events.latency", received_timestamp - recorded_timestamp, tags=metric_tags) metrics.timing("events.size.data.post_save", event.size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(self._data.get("errors") or ()), tags=metric_tags, ) return event def _get_event_user(self, project, data): user_data = data.get("user") if not user_data: return ip_address = user_data.get("ip_address") if ip_address: try: ipaddress.ip_address(six.text_type(ip_address)) except ValueError: ip_address = None euser = EventUser( project_id=project.id, ident=user_data.get("id"), email=user_data.get("email"), username=user_data.get("username"), ip_address=ip_address, name=user_data.get("name"), ) euser.set_hash() if not euser.hash: return cache_key = u"euserid:1:{}:{}".format(project.id, euser.hash) euser_id = cache.get(cache_key) if euser_id is None: try: with transaction.atomic(using=router.db_for_write(EventUser)): euser.save() except IntegrityError: try: euser = EventUser.objects.get(project_id=project.id, hash=euser.hash) except EventUser.DoesNotExist: # why??? e_userid = -1 else: if euser.name != (user_data.get("name") or euser.name): euser.update(name=user_data["name"]) e_userid = euser.id cache.set(cache_key, e_userid, 3600) return euser def _find_hashes(self, project, hash_list): return map( lambda hash: GroupHash.objects.get_or_create(project=project, hash=hash)[0], hash_list) def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded("Matches group tombstone %s" % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop("first_release", None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = ( Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id).values_list( "id", flat=True).first() if first_release else None, **kwargs), True, ) metrics.incr("group.created", skip_internal=True, tags={"platform": event.platform or "unknown"}) else: group = Group.objects.get(id=existing_group_id) group_is_new = False group._project_cache = project # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter( id__in=[h.id for h in new_hashes]).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION).update( group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True if not is_new: is_regression = self._process_existing_aggregate(group=group, event=event, data=kwargs, release=release) else: is_regression = False return group, is_new, is_regression def _handle_regression(self, group, event, release): if not group.is_resolved(): return # we only mark it as a regression if the event's release is newer than # the release which we originally marked this as resolved elif GroupResolution.has_resolution(group, release): return elif has_pending_commit_resolution(group): return if not plugin_is_regression(group, event): return # we now think its a regression, rely on the database to validate that # no one beat us to this date = max(event.datetime, group.last_seen) is_regression = bool( Group.objects.filter( id=group.id, # ensure we cant update things if the status has been set to # ignored status__in=[GroupStatus.RESOLVED, GroupStatus.UNRESOLVED], ).exclude( # add to the regression window to account for races here active_at__gte=date - timedelta(seconds=5)). update( active_at=date, # explicitly set last_seen here as ``is_resolved()`` looks # at the value last_seen=date, status=GroupStatus.UNRESOLVED, )) group.active_at = date group.status = GroupStatus.UNRESOLVED if is_regression and release: # resolutions are only valid if the state of the group is still # resolved -- if it were to change the resolution should get removed try: resolution = GroupResolution.objects.get(group=group) except GroupResolution.DoesNotExist: affected = False else: cursor = connection.cursor() # delete() API does not return affected rows cursor.execute( "DELETE FROM sentry_groupresolution WHERE id = %s", [resolution.id]) affected = cursor.rowcount > 0 if affected: # if we had to remove the GroupResolution (i.e. we beat the # the queue to handling this) then we need to also record # the corresponding event try: activity = Activity.objects.filter( group=group, type=Activity.SET_RESOLVED_IN_RELEASE, ident=resolution.id).order_by("-datetime")[0] except IndexError: # XXX: handle missing data, as its not overly important pass else: activity.update(data={"version": release.version}) if is_regression: activity = Activity.objects.create( project_id=group.project_id, group=group, type=Activity.SET_REGRESSION, data={"version": release.version if release else ""}, ) activity.send_notification() kick_off_status_syncs.apply_async(kwargs={ "project_id": group.project_id, "group_id": group.id }) return is_regression def _process_existing_aggregate(self, group, event, data, release): date = max(event.datetime, group.last_seen) extra = { "last_seen": date, "score": ScoreClause(group), "data": data["data"] } if event.message and event.message != group.message: extra["message"] = event.message if group.level != data["level"]: extra["level"] = data["level"] if group.culprit != data["culprit"]: extra["culprit"] = data["culprit"] is_regression = self._handle_regression(group, event, release) group.last_seen = extra["last_seen"] update_kwargs = {"times_seen": 1} buffer.incr(Group, update_kwargs, {"id": group.id}, extra) return is_regression
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] with configure_scope() as scope: scope.set_tag("project", project) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
def _do_save_event( cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs ): """ Saves an event to the database. """ set_current_event_project(project_id) from sentry.event_manager import EventManager, HashDiscarded event_type = "none" if cache_key and data is None: with metrics.timer("tasks.store.do_save_event.get_cache") as metric_tags: data = event_processing_store.get(cache_key) if data is not None: metric_tags["event_type"] = event_type = data.get("type") or "none" with metrics.global_tags(event_type=event_type): if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") set_current_event_project(project_id) # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): with metrics.timer("tasks.store.do_save_event.delete_raw_event"): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False ) return try: if killswitch_matches_context( "store.load-shed-save-event-projects", { "project_id": project_id, "event_type": event_type, "platform": data.get("platform") or "none", }, ): raise HashDiscarded("Load shedding save_event") with metrics.timer("tasks.store.do_save_event.event_manager.save"): manager = EventManager(data) # event.project.organization is populated after this statement. manager.save( project_id, assume_normalized=True, start_time=start_time, cache_key=cache_key ) # Put the updated event back into the cache so that post_process # has the most recent data. data = manager.get_data() if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) with metrics.timer("tasks.store.do_save_event.write_processing_cache"): event_processing_store.store(data) except HashDiscarded: # Delete the event payload from cache since it won't show up in post-processing. if cache_key: with metrics.timer("tasks.store.do_save_event.delete_cache"): event_processing_store.delete_by_key(cache_key) finally: reprocessing2.mark_event_reprocessed(data) if cache_key: with metrics.timer("tasks.store.do_save_event.delete_attachment_cache"): attachment_cache.delete(cache_key) if start_time: metrics.timing( "events.time-to-process", time() - start_time, instance=data["platform"], tags={ "is_reprocessing2": "true" if reprocessing2.is_reprocessed_event(data) else "false", }, ) time_synthetic_monitoring_event(data, project_id, start_time)
class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__( self, data, version="5", project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, project_config=None, sent_at=None, ): self._data = CanonicalKeyDict(data) self.version = version self._project = project # if not explicitly specified try to get the grouping from project_config if grouping_config is None and project_config is not None: config = project_config.config grouping_config = config.get("grouping_config") # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False self.project_config = project_config self.sent_at = sent_at def normalize(self, project_id=None): with metrics.timer("events.store.normalize.duration"): self._normalize_impl(project_id=project_id) def _normalize_impl(self, project_id=None): if self._project and project_id and project_id != self._project.id: raise RuntimeError( "Initialized EventManager with one project ID and called save() with another one" ) if self._normalized: raise RuntimeError("Already normalized") self._normalized = True from sentry_relay.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else project_id, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, normalize_user_agent=True, sent_at=self.sent_at.isoformat() if self.sent_at is not None else None, **DEFAULT_STORE_NORMALIZER_ARGS ) self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data))) def get_data(self): return self._data @metrics.wraps("event_manager.save") def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None): """ After normalizing and processing an event, save adjacent models such as releases and environments to postgres and write the event into eventstream. From there it will be picked up by Snuba and post-processing. We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize(project_id=project_id) self._normalized = True with metrics.timer("event_manager.save.project.get_from_cache"): project = Project.objects.get_from_cache(id=project_id) projects = {project.id: project} if self._data.get("type") == "transaction": self._data["project"] = int(project_id) job = {"data": self._data, "start_time": start_time} jobs = save_transaction_events([job], projects) return jobs[0]["event"] with metrics.timer("event_manager.save.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id ) job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time} jobs = [job] _pull_out_data(jobs, projects) _get_or_create_release_many(jobs, projects) _get_event_user_many(jobs, projects) job["project_key"] = None if job["key_id"] is not None: with metrics.timer("event_manager.load_project_key"): try: job["project_key"] = ProjectKey.objects.get_from_cache(id=job["key_id"]) except ProjectKey.DoesNotExist: pass with metrics.timer("event_manager.load_grouping_config"): # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(job["data"], project) ) with metrics.timer("event_manager.normalize_stacktraces_for_grouping"): normalize_stacktraces_for_grouping(job["data"], grouping_config) _derive_plugin_tags_many(jobs, projects) _derive_interface_tags_many(jobs) with metrics.timer("event_manager.apply_server_fingerprinting"): # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"] apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project)) with metrics.timer("event_manager.event.get_hashes"): # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = job["event"].get_hashes() except GroupingConfigNotFound: job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project) hashes = job["event"].get_hashes() job["data"]["hashes"] = hashes _materialize_metadata_many(jobs) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(job["materialized_metadata"]) group_metadata["last_received"] = job["received_timestamp"] kwargs = { "platform": job["platform"], "message": job["event"].search_message, "culprit": job["culprit"], "logger": job["logger_name"], "level": LOG_LEVELS_MAP.get(job["level"]), "last_seen": job["event"].datetime, "first_seen": job["event"].datetime, "active_at": job["event"].datetime, "data": group_metadata, } if job["release"]: kwargs["first_release"] = job["release"] # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. Also wait for grouping to remove attachments # based on the group counter. with metrics.timer("event_manager.get_attachments"): attachments = get_attachments(cache_key, job) try: job["group"], job["is_new"], job["is_regression"] = _save_aggregate( event=job["event"], hashes=hashes, release=job["release"], **kwargs ) except HashDiscarded: discard_event(job, attachments) raise job["event"].group = job["group"] # store a reference to the group id to guarantee validation of isolation # XXX(markus): No clue what this does job["event"].data.bind_ref(job["event"]) _get_or_create_environment_many(jobs, projects) if job["group"]: group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create( group_id=job["group"].id, environment_id=job["environment"].id, defaults={"first_release": job["release"] or None}, ) else: job["is_new_group_environment"] = False _get_or_create_release_associated_models(jobs, projects) if job["release"] and job["group"]: job["grouprelease"] = GroupRelease.get_or_create( group=job["group"], release=job["release"], environment=job["environment"], datetime=job["event"].datetime, ) _tsdb_record_all_metrics(jobs) if job["group"]: UserReport.objects.filter(project=project, event_id=job["event"].event_id).update( group=job["group"], environment=job["environment"] ) with metrics.timer("event_manager.filter_attachments_for_group"): attachments = filter_attachments_for_group(attachments, job) # XXX: DO NOT MUTATE THE EVENT PAYLOAD AFTER THIS POINT _materialize_event_metrics(jobs) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type,) old_bytes = job["event_metrics"].get(key) or 0 job["event_metrics"][key] = old_bytes + attachment.size _nodestore_save_many(jobs) save_unprocessed_event(project, event_id=job["event"].event_id) if job["release"]: if job["is_new"]: buffer.incr( ReleaseProject, {"new_groups": 1}, {"release_id": job["release"].id, "project_id": project.id}, ) if job["is_new_group_environment"]: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": job["release"].id, "environment_id": job["environment"].id, }, ) if not raw: if not project.first_event: project.update(first_event=job["event"].datetime) first_event_received.send_robust( project=project, event=job["event"], sender=Project ) _eventstream_insert_many(jobs) # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. with metrics.timer("event_manager.save_attachments"): save_attachments(cache_key, attachments, job) metric_tags = {"from_relay": "_relay_processed" in job["data"]} metrics.timing( "events.latency", job["received_timestamp"] - job["recorded_timestamp"], tags=metric_tags, ) metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(job["data"].get("errors") or ()), tags=metric_tags, ) _track_outcome_accepted_many(jobs) self._data = job["event"].data.data return job["event"]
def _do_symbolicate_event( cache_key, start_time, event_id, symbolicate_task, data=None, queue_switches=0 ): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "symbolicate"}, skip_internal=False ) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] from_reprocessing = ( symbolicate_task is symbolicate_event_from_reprocessing or symbolicate_task is symbolicate_event_from_reprocessing_low_priority ) # check whether the event is in the wrong queue and if so, move it to the other one. # we do this at most SYMBOLICATOR_MAX_QUEUE_SWITCHES times. if queue_switches >= SYMBOLICATOR_MAX_QUEUE_SWITCHES: metrics.gauge("tasks.store.symbolicate_event.low_priority.max_queue_switches", 1) else: is_low_priority = symbolicate_task in [ symbolicate_event_low_priority, symbolicate_event_from_reprocessing_low_priority, ] should_be_low_priority = should_demote_symbolication(project_id) if is_low_priority != should_be_low_priority: metrics.gauge("tasks.store.symbolicate_event.low_priority.wrong_queue", 1) submit_symbolicate( should_be_low_priority, from_reprocessing, cache_key, event_id, start_time, data, queue_switches + 1, ) return def _continue_to_process_event(): process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, ) symbolication_function = get_symbolication_function(data) symbolication_function_name = getattr(symbolication_function, "__name__", "none") if killswitch_matches_context( "store.load-shed-symbolicate-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", "symbolication_function": symbolication_function_name, }, ): return _continue_to_process_event() has_changed = False symbolication_start_time = time() submission_ratio = options.get("symbolicate-event.low-priority.metrics.submission-rate") submit_realtime_metrics = not from_reprocessing and random.random() < submission_ratio if submit_realtime_metrics: with sentry_sdk.start_span(op="tasks.store.symbolicate_event.low_priority.metrics.counter"): timestamp = int(symbolication_start_time) try: realtime_metrics.increment_project_event_counter(project_id, timestamp) except Exception as e: sentry_sdk.capture_exception(e) with sentry_sdk.start_span(op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolication_function", symbolication_function_name) with metrics.timer( "tasks.store.symbolicate_event.symbolication", tags={"symbolication_function": symbolication_function_name}, ): while True: try: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.%s" % symbolication_function_name ) as span: symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True break except RetrySymbolication as e: if ( time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "symbolicate.slow", extra={"project_id": project_id, "event_id": event_id}, ) if ( time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "timeout", "symbolication_function": symbolication_function_name, }, ) error_logger.exception( "symbolicate.failed.infinite_retry", extra={"project_id": project_id, "event_id": event_id}, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break else: # sleep for `retry_after` but max 5 seconds and try again metrics.incr( "tasks.store.symbolicate_event.retry", tags={"symbolication_function": symbolication_function_name}, ) sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER)) continue except Exception: metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "error", "symbolication_function": symbolication_function_name, }, ) error_logger.exception("tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break if submit_realtime_metrics: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.low_priority.metrics.histogram" ): symbolication_duration = int(time() - symbolication_start_time) try: realtime_metrics.increment_project_duration_counter( project_id, timestamp, symbolication_duration ) except Exception as e: sentry_sdk.capture_exception(e) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) return _continue_to_process_event()
class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__( self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop('interface')) report = data.pop('report') except KeyError: raise APIForbidden('No report or interface data') # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = ( report if isinstance(report, interface) else interface.from_raw(report) ) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) def clean(d): return dict(filter(lambda x: x[1], d.items())) data.update( { 'logger': 'csp', 'message': instance.get_message(), 'culprit': instance.get_culprit(), instance.path: instance.to_json(), 'tags': instance.get_tags(), 'errors': [], 'user': {'ip_address': self._client_ip}, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP 'request': { 'url': instance.get_origin(), 'headers': clean( { 'User-Agent': self._user_agent, 'Referer': instance.get_referrer(), } ), }, } ) self._data = data def normalize(self): with metrics.timer('events.store.normalize.duration'): self._normalize_impl() metrics.timing( 'events.store.normalize.errors', len(self._data.get("errors") or ()), ) def _normalize_impl(self): if self._normalized: raise RuntimeError('Already normalized') self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( geoip_lookup=rust_geoip, project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT, max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES, valid_platforms=list(VALID_PLATFORMS), max_secs_in_future=MAX_SECS_IN_FUTURE, max_secs_in_past=MAX_SECS_IN_PAST, enable_trimming=True, is_renormalize=self._is_renormalize, remove_other=self._remove_other, ) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data)) ) normalize_user_agent(self._data) def should_filter(self): ''' returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics ''' for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter(self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self._project, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get('release') if release and not is_valid_release(self._project, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = get_path(self._data, 'logentry', 'formatted') \ or get_path(self._data, 'logentry', 'message') \ or '' if error_message and not is_valid_error_message(self._project, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]): message = u': '.join( filter(None, map(exc.get, ['type', 'value'])) ) if message and not is_valid_error_message(self._project, message): return (True, FilterStatKeys.ERROR_MESSAGE) for filter_cls in filters.all(): filter_obj = filter_cls(self._project) if filter_obj.is_enabled() and filter_obj.test(self._data): return (True, six.text_type(filter_obj.id)) return (False, None) def get_data(self): return self._data def _get_event_instance(self, project_id=None): data = self._data event_id = data.get('event_id') platform = data.get('platform') recorded_timestamp = data.get('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) time_spent = data.get('time_spent') data['node_id'] = Event.generate_node_id(project_id, event_id) return Event( project_id=project_id or self._project.id, event_id=event_id, data=EventDict(data, skip_renormalization=True), time_spent=time_spent, datetime=date, platform=platform ) def get_culprit(self): """Helper to calculate the default culprit""" return force_text( self._data.get('culprit') or self._data.get('transaction') or generate_culprit(self._data) or '' ) def get_event_type(self): """Returns the event type.""" return eventtypes.get(self._data.get('type', 'default'))() def materialize_metadata(self): """Returns the materialized metadata to be merged with group or event data. This currently produces the keys `type`, `metadata`, `title` and `location`. This should most likely also produce `culprit` here. """ event_type = self.get_event_type() event_metadata = event_type.get_metadata(self._data) return { 'type': event_type.key, 'metadata': event_metadata, 'title': event_type.get_title(event_metadata), 'location': event_type.get_location(event_metadata), } def get_search_message(self, event_metadata=None, culprit=None): """This generates the internal event.message attribute which is used for search purposes. It adds a bunch of data from the metadata and the culprit. """ if event_metadata is None: event_metadata = self.get_event_type().get_metadata(self._data) if culprit is None: culprit = self.get_culprit() data = self._data message = '' if data.get('logentry'): message += (data['logentry'].get('formatted') or data['logentry'].get('message') or '') if event_metadata: for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project)) hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute( Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) metrics.timing( 'events.size.data.post_save', event.size, tags={'project_id': project.id} ) return event def _get_event_user(self, project, data): user_data = data.get('user') if not user_data: return euser = EventUser( project_id=project.id, ident=user_data.get('id'), email=user_data.get('email'), username=user_data.get('username'), ip_address=user_data.get('ip_address'), name=user_data.get('name'), ) euser.set_hash() if not euser.hash: return cache_key = u'euserid:1:{}:{}'.format( project.id, euser.hash, ) euser_id = default_cache.get(cache_key) if euser_id is None: try: with transaction.atomic(using=router.db_for_write(EventUser)): euser.save() except IntegrityError: try: euser = EventUser.objects.get( project_id=project.id, hash=euser.hash, ) except EventUser.DoesNotExist: # why??? e_userid = -1 else: if euser.name != (user_data.get('name') or euser.name): euser.update( name=user_data['name'], ) e_userid = euser.id default_cache.set(cache_key, e_userid, 3600) return euser def _find_hashes(self, project, hash_list): return map( lambda hash: GroupHash.objects.get_or_create( project=project, hash=hash, )[0], hash_list, ) def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs ), True metrics.incr( 'group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'} ) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter( id__in=[h.id for h in new_hashes], ).exclude( state=GroupHash.State.LOCKED_IN_MIGRATION, ).update(group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = ( features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, ) ) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( all_hashes[0].id, event.event_id, ) return group, is_new, is_regression, is_sample def _handle_regression(self, group, event, release): if not group.is_resolved(): return # we only mark it as a regression if the event's release is newer than # the release which we originally marked this as resolved elif GroupResolution.has_resolution(group, release): return elif has_pending_commit_resolution(group): return if not plugin_is_regression(group, event): return # we now think its a regression, rely on the database to validate that # no one beat us to this date = max(event.datetime, group.last_seen) is_regression = bool( Group.objects.filter( id=group.id, # ensure we cant update things if the status has been set to # ignored status__in=[GroupStatus.RESOLVED, GroupStatus.UNRESOLVED], ).exclude( # add to the regression window to account for races here active_at__gte=date - timedelta(seconds=5), ).update( active_at=date, # explicitly set last_seen here as ``is_resolved()`` looks # at the value last_seen=date, status=GroupStatus.UNRESOLVED ) ) group.active_at = date group.status = GroupStatus.UNRESOLVED if is_regression and release: # resolutions are only valid if the state of the group is still # resolved -- if it were to change the resolution should get removed try: resolution = GroupResolution.objects.get( group=group, ) except GroupResolution.DoesNotExist: affected = False else: cursor = connection.cursor() # delete() API does not return affected rows cursor.execute("DELETE FROM sentry_groupresolution WHERE id = %s", [resolution.id]) affected = cursor.rowcount > 0 if affected: # if we had to remove the GroupResolution (i.e. we beat the # the queue to handling this) then we need to also record # the corresponding event try: activity = Activity.objects.filter( group=group, type=Activity.SET_RESOLVED_IN_RELEASE, ident=resolution.id, ).order_by('-datetime')[0] except IndexError: # XXX: handle missing data, as its not overly important pass else: activity.update(data={ 'version': release.version, }) if is_regression: activity = Activity.objects.create( project=group.project, group=group, type=Activity.SET_REGRESSION, data={ 'version': release.version if release else '', } ) activity.send_notification() kick_off_status_syncs.apply_async(kwargs={ 'project_id': group.project_id, 'group_id': group.id, }) return is_regression def _process_existing_aggregate(self, group, event, data, release): date = max(event.datetime, group.last_seen) extra = { 'last_seen': date, 'score': ScoreClause(group), 'data': data['data'], } if event.message and event.message != group.message: extra['message'] = event.message if group.level != data['level']: extra['level'] = data['level'] if group.culprit != data['culprit']: extra['culprit'] = data['culprit'] is_regression = self._handle_regression(group, event, release) group.last_seen = extra['last_seen'] update_kwargs = { 'times_seen': 1, } buffer.incr(Group, update_kwargs, { 'id': group.id, }, extra) return is_regression
def load_data(platform, default=None, timestamp=None, sample_name=None): # NOTE: Before editing this data, make sure you understand the context # in which its being used. It is NOT only used for local development and # has production consequences. # * bin/load-mocks to generate fake data for local testing # * When a new project is created, a fake event is generated as a "starter" # event so it's not an empty project. # * When a user clicks Test Configuration from notification plugin settings page, # a fake event is generated to go through the pipeline. data = None language = None platform_data = INTEGRATION_ID_TO_PLATFORM_DATA.get(platform) if platform_data is not None and platform_data['type'] != 'language': language = platform_data['language'] for platform in (platform, language, default): if not platform: continue json_path = os.path.join(DATA_ROOT, 'samples', '%s.json' % (platform.encode('utf-8'), )) if not os.path.exists(json_path): continue if not sample_name: try: sample_name = INTEGRATION_ID_TO_PLATFORM_DATA[platform]['name'] except KeyError: pass with open(json_path) as fp: data = json.loads(fp.read()) break if data is None: return data = CanonicalKeyDict(data) if platform in ('csp', 'hkpk', 'expectct', 'expectstaple'): return data data['platform'] = platform data['message'] = 'This is an example %s exception' % (sample_name or platform, ) data['sentry.interfaces.User'] = generate_user( ip_address='127.0.0.1', username='******', id=1, email='*****@*****.**', ) data['extra'] = { 'session': { 'foo': 'bar', }, 'results': [1, 2, 3, 4, 5], 'emptyList': [], 'emptyMap': {}, 'length': 10837790, 'unauthorized': False, 'url': 'http://example.org/foo/bar/', } data['modules'] = { 'my.package': '1.0.0', } data['sentry.interfaces.Http'] = { "cookies": 'foo=bar;biz=baz', "url": "http://example.com/foo", "headers": { "Referer": "http://example.com", "Content-Type": "application/json", "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36" }, "env": { 'ENV': 'prod', }, "query_string": "foo=bar", "data": '{"hello": "world"}', "method": "GET" } start = datetime.utcnow() if timestamp: try: start = datetime.utcfromtimestamp(timestamp) except TypeError: pass # Make breadcrumb timestamps relative to right now so they make sense breadcrumbs = data.get('sentry.interfaces.Breadcrumbs') if breadcrumbs is not None: duration = 1000 # At this point, breadcrumbs are not normalized. They can either be a # direct list or a values object containing a list. values = isinstance(breadcrumbs, list) and breadcrumbs or breadcrumbs['values'] for value in reversed(values): value['timestamp'] = milliseconds_ago(start, duration) # Every breadcrumb is 1s apart duration += 1000 return data
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data) if enhanced: data = enhanced has_changed = True try: # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async( args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins.base import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "process" }, skip_internal=False) error_logger.error("process.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] event_id = data["event_id"] project = Project.objects.get_from_cache(id=project_id) with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in enhancers or (): enhanced = safe_execute( enhancer, data, _passthrough_errors=(RetrySymbolication, )) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning("process.slow", extra={ "project_id": project_id, "event_id": event_id }) if start_time and ( time() - start_time) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) error_logger.exception( "process.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) else: retry_process_event.apply_async( args=(), kwargs={ "process_task_name": process_task.__name__, "task_kwargs": { "cache_key": cache_key, "event_id": event_id, "start_time": start_time, }, }, countdown=e.retry_after, ) return # Second round of datascrubbing after stacktrace and language-specific # processing. First round happened as part of ingest. # # We assume that all potential PII is produced as part of stacktrace # processors and event enhancers. # # We assume that plugins for eg sessionstack (running via # `plugin.get_event_preprocessors`) are not producing data that should be # PII-stripped, ever. # # XXX(markus): Javascript event error translation is happening after this block # because it uses `get_event_preprocessors` instead of # `get_event_enhancers`, possibly move? if has_changed and features.has("organizations:datascrubbers-v2", project.organization, actor=None): with metrics.timer("tasks.store.datascrubbers.scrub"): project_config = get_project_config(project) new_data = safe_execute(scrub_data, project_config=project_config, event=data.data) # XXX(markus): When datascrubbing is finally "totally stable", we might want # to drop the event if it crashes to avoid saving PII if new_data is not None: data.data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in processors or (): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ "project"] == project_id, "Project cannot be mutated by plugins" # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: # Run some of normalization again such that we don't: # - persist e.g. incredibly large stacktraces from minidumps # - store event timestamps that are older than our retention window # (also happening with minidumps) normalizer = StoreNormalizer(remove_other=False, is_renormalize=True, **DEFAULT_STORE_NORMALIZER_ARGS) data = normalizer.normalize_event(dict(data)) issues = data.get("processing_issues") try: if issues and create_failed_event( cache_key, data, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev, ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
class EventManager(object): """ Handles normalization in both the store endpoint and the save task. The intention is to swap this class out with a reimplementation in Rust. """ def __init__(self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, relay_config=None): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project # if not explicitly specified try to get the grouping from relay_config if grouping_config is None and relay_config is not None: config = relay_config.config grouping_config = config.get('grouping_config') # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project( self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop('interface')) report = data.pop('report') except KeyError: raise APIForbidden('No report or interface data') # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = (report if isinstance(report, interface) else interface.from_raw(report)) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) def clean(d): return dict(filter(lambda x: x[1], d.items())) data.update({ 'logger': 'csp', 'message': instance.get_message(), 'culprit': instance.get_culprit(), instance.path: instance.to_json(), 'tags': instance.get_tags(), 'errors': [], 'user': { 'ip_address': self._client_ip }, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP 'request': { 'url': instance.get_origin(), 'headers': clean({ 'User-Agent': self._user_agent, 'Referer': instance.get_referrer(), }), }, }) self._data = data def normalize(self): with metrics.timer('events.store.normalize.duration'): self._normalize_impl() metrics.timing( 'events.store.normalize.errors', len(self._data.get("errors") or ()), ) def _normalize_impl(self): if self._normalized: raise RuntimeError('Already normalized') self._normalized = True from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( project_id=self._project.id if self._project else None, client_ip=self._client_ip, client=self._auth.client if self._auth else None, key_id=six.text_type(self._key.id) if self._key else None, grouping_config=self._grouping_config, protocol_version=six.text_type(self.version) if self.version is not None else None, is_renormalize=self._is_renormalize, remove_other=self._remove_other, **DEFAULT_STORE_NORMALIZER_ARGS) self._data = CanonicalKeyDict( rust_normalizer.normalize_event(dict(self._data))) normalize_user_agent(self._data) def should_filter(self): ''' returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics ''' for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter( self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self._project, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get('release') if release and not is_valid_release(self._project, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = get_path(self._data, 'logentry', 'formatted') \ or get_path(self._data, 'logentry', 'message') \ or '' if error_message and not is_valid_error_message( self._project, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]): message = u': '.join(filter(None, map(exc.get, ['type', 'value']))) if message and not is_valid_error_message(self._project, message): return (True, FilterStatKeys.ERROR_MESSAGE) for filter_cls in filters.all(): filter_obj = filter_cls(self._project) if filter_obj.is_enabled() and filter_obj.test(self._data): return (True, six.text_type(filter_obj.id)) return (False, None) def get_data(self): return self._data def _get_event_instance(self, project_id=None): data = self._data event_id = data.get('event_id') platform = data.get('platform') recorded_timestamp = data.get('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) time_spent = data.get('time_spent') data['node_id'] = Event.generate_node_id(project_id, event_id) return Event(project_id=project_id or self._project.id, event_id=event_id, data=EventDict(data, skip_renormalization=True), time_spent=time_spent, datetime=date, platform=platform) def get_culprit(self): """Helper to calculate the default culprit""" return force_text( self._data.get('culprit') or self._data.get('transaction') or generate_culprit(self._data) or '') def get_event_type(self): """Returns the event type.""" return eventtypes.get(self._data.get('type', 'default'))() def materialize_metadata(self): """Returns the materialized metadata to be merged with group or event data. This currently produces the keys `type`, `metadata`, `title` and `location`. This should most likely also produce `culprit` here. """ event_type = self.get_event_type() event_metadata = event_type.get_metadata(self._data) return { 'type': event_type.key, 'metadata': event_metadata, 'title': event_type.get_title(event_metadata), 'location': event_type.get_location(event_metadata), } def get_search_message(self, event_metadata=None, culprit=None): """This generates the internal event.message attribute which is used for search purposes. It adds a bunch of data from the metadata and the culprit. """ if event_metadata is None: event_metadata = self.get_event_type().get_metadata(self._data) if culprit is None: culprit = self.get_culprit() data = self._data message = '' if data.get('logentry'): message += (data['logentry'].get('formatted') or data['logentry'].get('message') or '') if event_metadata: for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, }) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] apply_server_fingerprinting( data, get_fingerprinting_config_for_project(project)) # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = event.get_hashes() except GroupingConfigNotFound: data['grouping_config'] = get_grouping_config_dict_for_project( project) hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float( event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic( using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, }) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append((tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, })) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # Update any event attachment that arrived before the event group was defined. EventAttachment.objects.filter( project_id=project.id, event_id=event_id, ).update(group_id=group.id, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, }) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr(ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, }) if is_new_group_environment: buffer.incr(ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, }) safe_execute(Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) metrics.timing('events.size.data.post_save', event.size, tags={'project_id': project.id}) return event def _get_event_user(self, project, data): user_data = data.get('user') if not user_data: return euser = EventUser( project_id=project.id, ident=user_data.get('id'), email=user_data.get('email'), username=user_data.get('username'), ip_address=user_data.get('ip_address'), name=user_data.get('name'), ) euser.set_hash() if not euser.hash: return cache_key = u'euserid:1:{}:{}'.format( project.id, euser.hash, ) euser_id = default_cache.get(cache_key) if euser_id is None: try: with transaction.atomic(using=router.db_for_write(EventUser)): euser.save() except IntegrityError: try: euser = EventUser.objects.get( project_id=project.id, hash=euser.hash, ) except EventUser.DoesNotExist: # why??? e_userid = -1 else: if euser.name != (user_data.get('name') or euser.name): euser.update(name=user_data['name'], ) e_userid = euser.id default_cache.set(cache_key, e_userid, 3600) return euser def _find_hashes(self, project, hash_list): return map( lambda hash: GroupHash.objects.get_or_create( project=project, hash=hash, )[0], hash_list, ) def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) existing_group_id = None for h in all_hashes: if h.group_id is not None: existing_group_id = h.group_id break if h.group_tombstone_id is not None: raise HashDiscarded('Matches group tombstone %s' % h.group_tombstone_id) # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: # it's possible the release was deleted between # when we queried for the release and now, so # make sure it still exists first_release = kwargs.pop('first_release', None) with transaction.atomic(): short_id = project.next_short_id() group, group_is_new = Group.objects.create( project=project, short_id=short_id, first_release_id=Release.objects.filter( id=first_release.id, ).values_list('id', flat=True).first() if first_release else None, **kwargs), True metrics.incr('group.created', skip_internal=True, tags={'platform': event.platform or 'unknown'}) else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h for h in all_hashes if h.group_id is None] if new_hashes: # XXX: There is a race condition here wherein another process could # create a new group that is associated with one of the new hashes, # add some event(s) to it, and then subsequently have the hash # "stolen" by this process. This then "orphans" those events from # their "siblings" in the group we've created here. We don't have a # way to fix this, since we can't update the group on those hashes # without filtering on `group_id` (which we can't do due to query # planner weirdness.) For more context, see 84c6f75a and d0e22787, # as well as GH-5085. GroupHash.objects.filter(id__in=[ h.id for h in new_hashes ], ).exclude(state=GroupHash.State.LOCKED_IN_MIGRATION, ).update( group=group) if group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = (features.has('projects:sample-events', project=project) and should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, )) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample if not is_sample: GroupHash.record_last_processed_event_id( all_hashes[0].id, event.event_id, ) return group, is_new, is_regression, is_sample def _handle_regression(self, group, event, release): if not group.is_resolved(): return # we only mark it as a regression if the event's release is newer than # the release which we originally marked this as resolved elif GroupResolution.has_resolution(group, release): return elif has_pending_commit_resolution(group): return if not plugin_is_regression(group, event): return # we now think its a regression, rely on the database to validate that # no one beat us to this date = max(event.datetime, group.last_seen) is_regression = bool( Group.objects.filter( id=group.id, # ensure we cant update things if the status has been set to # ignored status__in=[GroupStatus.RESOLVED, GroupStatus.UNRESOLVED], ).exclude( # add to the regression window to account for races here active_at__gte=date - timedelta(seconds=5), ).update( active_at=date, # explicitly set last_seen here as ``is_resolved()`` looks # at the value last_seen=date, status=GroupStatus.UNRESOLVED)) group.active_at = date group.status = GroupStatus.UNRESOLVED if is_regression and release: # resolutions are only valid if the state of the group is still # resolved -- if it were to change the resolution should get removed try: resolution = GroupResolution.objects.get(group=group, ) except GroupResolution.DoesNotExist: affected = False else: cursor = connection.cursor() # delete() API does not return affected rows cursor.execute( "DELETE FROM sentry_groupresolution WHERE id = %s", [resolution.id]) affected = cursor.rowcount > 0 if affected: # if we had to remove the GroupResolution (i.e. we beat the # the queue to handling this) then we need to also record # the corresponding event try: activity = Activity.objects.filter( group=group, type=Activity.SET_RESOLVED_IN_RELEASE, ident=resolution.id, ).order_by('-datetime')[0] except IndexError: # XXX: handle missing data, as its not overly important pass else: activity.update(data={ 'version': release.version, }) if is_regression: activity = Activity.objects.create( project=group.project, group=group, type=Activity.SET_REGRESSION, data={ 'version': release.version if release else '', }) activity.send_notification() kick_off_status_syncs.apply_async(kwargs={ 'project_id': group.project_id, 'group_id': group.id, }) return is_regression def _process_existing_aggregate(self, group, event, data, release): date = max(event.datetime, group.last_seen) extra = { 'last_seen': date, 'score': ScoreClause(group), 'data': data['data'], } if event.message and event.message != group.message: extra['message'] = event.message if group.level != data['level']: extra['level'] = data['level'] if group.culprit != data['culprit']: extra['culprit'] = data['culprit'] is_regression = self._handle_regression(group, event, release) group.last_seen = extra['last_seen'] update_kwargs = { 'times_seen': 1, } buffer.incr(Group, update_kwargs, { 'id': group.id, }, extra) return is_regression
def save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas, tsdb from sentry.models import ProjectKey if cache_key: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'}) return Raven.tags_context({ 'project': project_id, }) try: manager = EventManager(data) event = manager.save(project_id) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) except HashDiscarded: increment_list = [ (tsdb.models.project_total_received_discarded, project_id), ] try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: pass else: increment_list.extend([ (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_blacklisted, project.organization_id), ]) project_key = None if data.get('key_id') is not None: try: project_key = ProjectKey.objects.get_from_cache(id=data['key_id']) except ProjectKey.DoesNotExist: pass else: increment_list.append((tsdb.models.key_total_blacklisted, project_key.id)) quotas.refund( project, key=project_key, timestamp=start_time, ) tsdb.incr_multi( increment_list, timestamp=to_datetime(start_time) if start_time is not None else None, ) finally: if cache_key: default_cache.delete(cache_key) attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, data=None): from sentry.lang.native.processing import get_symbolication_function if data is None: data = event_processing_store.get(cache_key) if data is None: metrics.incr("events.failed", tags={ "reason": "cache", "stage": "symbolicate" }, skip_internal=False) error_logger.error("symbolicate.failed.empty", extra={"cache_key": cache_key}) return data = CanonicalKeyDict(data) project_id = data["project"] set_current_event_project(project_id) event_id = data["event_id"] if killswitch_matches_context( "store.load-shed-symbolicate-event-projects", { "project_id": project_id, "event_id": event_id, "platform": data.get("platform") or "null", }, ): return symbolication_function = get_symbolication_function(data) has_changed = False from_reprocessing = symbolicate_task is symbolicate_event_from_reprocessing symbolication_start_time = time() with sentry_sdk.start_span( op="tasks.store.symbolicate_event.symbolication") as span: span.set_data("symbolicaton_function", symbolication_function.__name__) with metrics.timer( "tasks.store.symbolicate_event.symbolication", tags={ "symbolication_function": symbolication_function.__name__ }, ): while True: try: with sentry_sdk.start_span( op="tasks.store.symbolicate_event.%s" % symbolication_function.__name__) as span: symbolicated_data = symbolication_function(data) span.set_data("symbolicated_data", bool(symbolicated_data)) if symbolicated_data: data = symbolicated_data has_changed = True break except RetrySymbolication as e: if (time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_WARN_TIMEOUT: error_logger.warning( "symbolicate.slow", extra={ "project_id": project_id, "event_id": event_id }, ) if (time() - symbolication_start_time ) > settings.SYMBOLICATOR_PROCESS_EVENT_HARD_TIMEOUT: # Do not drop event but actually continue with rest of pipeline # (persisting unsymbolicated event) metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "timeout", "symbolication_function": symbolication_function.__name__, }, ) error_logger.exception( "symbolicate.failed.infinite_retry", extra={ "project_id": project_id, "event_id": event_id }, ) data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break else: # sleep for `retry_after` but max 5 seconds and try again metrics.incr( "tasks.store.symbolicate_event.retry", tags={ "symbolication_function": symbolication_function.__name__ }, ) sleep(min(e.retry_after, SYMBOLICATOR_MAX_RETRY_AFTER)) continue except Exception: metrics.incr( "tasks.store.symbolicate_event.fatal", tags={ "reason": "error", "symbolication_function": symbolication_function.__name__, }, ) error_logger.exception( "tasks.store.symbolicate_event.symbolication") data.setdefault("_metrics", {})["flag.processing.error"] = True data.setdefault("_metrics", {})["flag.processing.fatal"] = True has_changed = True break # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: cache_key = event_processing_store.store(data) process_task = process_event_from_reprocessing if from_reprocessing else process_event _do_process_event( cache_key=cache_key, start_time=start_time, event_id=event_id, process_task=process_task, data=data, data_has_changed=has_changed, from_symbolicate=True, )