def __init__(self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project( self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False
def __init__( self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, relay_config=None ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project # if not explicitly specified try to get the grouping from relay_config if grouping_config is None and relay_config is not None: config = relay_config.config grouping_config = config.get('grouping_config') # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False self.relay_config = relay_config
def __init__( self, data, version='5', project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None ): self._data = _decode_event(data, content_encoding=content_encoding) self.version = version self._project = project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False
def __init__( self, data, version="5", project=None, grouping_config=None, client_ip=None, user_agent=None, auth=None, key=None, content_encoding=None, is_renormalize=False, remove_other=None, project_config=None, sent_at=None, ): self._data = CanonicalKeyDict(data) self.version = version self._project = project # if not explicitly specified try to get the grouping from project_config if grouping_config is None and project_config is not None: config = project_config.config grouping_config = config.get("grouping_config") # if we still don't have a grouping also try the project if grouping_config is None and project is not None: grouping_config = get_grouping_config_dict_for_project(self._project) self._grouping_config = grouping_config self._client_ip = client_ip self._user_agent = user_agent self._auth = auth self._key = key self._is_renormalize = is_renormalize self._remove_other = remove_other self._normalized = False self.project_config = project_config self.sent_at = sent_at
def get_project_config(project, full_config=True, project_keys=None): """ Constructs the ProjectConfig information. :param project: The project to load configuration for. Ensure that organization is bound on this object; otherwise it will be loaded from the database. :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param project_keys: Pre-fetched project keys for performance. However, if no project keys are provided it is assumed that the config does not need to contain auth information (this is the case when used in python's StoreView) :return: a ProjectConfig object for the given project """ with configure_scope() as scope: scope.set_tag("project", project.id) if project.status != ObjectStatus.VISIBLE: return ProjectConfig(project, disabled=True) public_keys = get_public_key_configs(project, full_config, project_keys=project_keys) with Hub.current.start_span(op="get_public_config"): now = datetime.utcnow().replace(tzinfo=utc) cfg = { "disabled": False, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": list(get_origins(project)), "trustedRelays": [ r["public_key"] for r in project.organization.get_option( "sentry:trusted-relays", []) if r ], "piiConfig": get_pii_config(project), "datascrubbingSettings": get_datascrubbing_settings(project), }, "organizationId": project.organization_id, "projectId": project.id, # XXX: Unused by Relay, required by Python store } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) with Hub.current.start_span(op="get_filter_settings"): cfg["config"]["filterSettings"] = get_filter_settings(project) with Hub.current.start_span(op="get_grouping_config_dict_for_project"): cfg["config"]["groupingConfig"] = get_grouping_config_dict_for_project( project) with Hub.current.start_span(op="get_event_retention"): cfg["config"]["eventRetention"] = quotas.get_event_retention( project.organization) with Hub.current.start_span(op="get_all_quotas"): cfg["config"]["quotas"] = get_quotas(project, keys=project_keys) return ProjectConfig(project, **cfg)
def save(self, project_id, raw=False, assume_normalized=False, cache_key=None): """ After normalizing and processing an event, save adjacent models such as releases and environments to postgres and write the event into eventstream. From there it will be picked up by Snuba and post-processing. We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True with metrics.timer("event_manager.save.project.get_from_cache"): project = Project.objects.get_from_cache(id=project_id) with metrics.timer("event_manager.save.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) job = {"data": self._data, "project_id": project_id, "raw": raw} jobs = [job] projects = {project.id: project} _pull_out_data(jobs, projects) # Right now the event type is the signal to skip the group. This # is going to change a lot. if job["event"].get_event_type() == "transaction": issueless_event = True else: issueless_event = False _get_or_create_release_many(jobs, projects) # XXX: remove if job["dist"] and job["release"]: job["dist"] = job["release"].add_dist(job["dist"], job["event"].datetime) # dont allow a conflicting 'dist' tag pop_tag(job["data"], "dist") set_tag(job["data"], "sentry:dist", job["dist"].name) else: job["dist"] = None _get_event_user_many(jobs, projects) with metrics.timer("event_manager.load_grouping_config"): # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(job["data"], project)) with metrics.timer("event_manager.normalize_stacktraces_for_grouping"): normalize_stacktraces_for_grouping(job["data"], grouping_config) _derive_plugin_tags_many(jobs, projects) _derive_interface_tags_many(jobs) with metrics.timer("event_manager.apply_server_fingerprinting"): # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. job["data"]["fingerprint"] = job["data"].get("fingerprint") or [ "{{ default }}" ] apply_server_fingerprinting( job["data"], get_fingerprinting_config_for_project(project)) with metrics.timer("event_manager.event.get_hashes"): # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = job["event"].get_hashes() except GroupingConfigNotFound: job["data"][ "grouping_config"] = get_grouping_config_dict_for_project( project) hashes = job["event"].get_hashes() job["data"]["hashes"] = hashes _materialize_metadata_many(jobs) job["received_timestamp"] = received_timestamp = job["event"].data.get( "received") or float(job["event"].datetime.strftime("%s")) if not issueless_event: # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(job["materialized_metadata"]) group_metadata["last_received"] = received_timestamp kwargs = { "platform": job["platform"], "message": job["event"].search_message, "culprit": job["culprit"], "logger": job["logger_name"], "level": LOG_LEVELS_MAP.get(job["level"]), "last_seen": job["event"].datetime, "first_seen": job["event"].datetime, "active_at": job["event"].datetime, "data": group_metadata, } if job["release"]: kwargs["first_release"] = job["release"] try: job["group"], job["is_new"], job[ "is_regression"] = _save_aggregate(event=job["event"], hashes=hashes, release=job["release"], **kwargs) except HashDiscarded: event_discarded.send_robust(project=project, sender=EventManager) metrics.incr( "events.discarded", skip_internal=True, tags={ "organization_id": project.organization_id, "platform": job["platform"] }, ) raise job["event"].group = job["group"] else: job["group"] = None job["is_new"] = False job["is_regression"] = False _send_event_saved_signal_many(jobs, projects) # store a reference to the group id to guarantee validation of isolation # XXX(markus): No clue what this does job["event"].data.bind_ref(job["event"]) _get_or_create_environment_many(jobs, projects) if job["group"]: group_environment, job[ "is_new_group_environment"] = GroupEnvironment.get_or_create( group_id=job["group"].id, environment_id=job["environment"].id, defaults={"first_release": job["release"] or None}, ) else: job["is_new_group_environment"] = False _get_or_create_release_associated_models(jobs, projects) if job["release"] and job["group"]: job["grouprelease"] = GroupRelease.get_or_create( group=job["group"], release=job["release"], environment=job["environment"], datetime=job["event"].datetime, ) _tsdb_record_all_metrics(jobs) if job["group"]: UserReport.objects.filter(project=project, event_id=job["event"].event_id).update( group=job["group"], environment=job["environment"]) # Enusre the _metrics key exists. This is usually created during # and prefilled with ingestion sizes. event_metrics = job["event"].data.get("_metrics") or {} job["event"].data["_metrics"] = event_metrics # Capture the actual size that goes into node store. event_metrics["bytes.stored.event"] = len( json.dumps(dict(job["event"].data.items()))) if not issueless_event: # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. attachments = get_attachments(cache_key, job["event"]) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type, ) event_metrics[key] = (event_metrics.get(key) or 0) + len( attachment.data) _nodestore_save_many(jobs) if job["release"] and not issueless_event: if job["is_new"]: buffer.incr( ReleaseProject, {"new_groups": 1}, { "release_id": job["release"].id, "project_id": project.id }, ) if job["is_new_group_environment"]: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": job["release"].id, "environment_id": job["environment"].id, }, ) if not raw: if not project.first_event: project.update(first_event=job["event"].datetime) first_event_received.send_robust(project=project, event=job["event"], sender=Project) _eventstream_insert_many(jobs) if not issueless_event: # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. save_attachments(attachments, job["event"]) metric_tags = {"from_relay": "_relay_processed" in job["data"]} metrics.timing("events.latency", received_timestamp - job["recorded_timestamp"], tags=metric_tags) metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(job["data"].get("errors") or ()), tags=metric_tags, ) self._data = job["event"].data.data return job["event"]
def get_grouping_config(self): """Returns the event grouping config.""" from sentry.grouping.api import get_grouping_config_dict_for_project return self.data.get('grouping_config') \ or get_grouping_config_dict_for_project(self.project)
def save(self, project_id, raw=False, assume_normalized=False, cache_key=None): """ We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get("level") # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get("transaction") logger_name = data.get("logger") release = data.get("release") dist = data.get("dist") environment = data.get("environment") recorded_timestamp = data.get("timestamp") # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Right now the event type is the signal to skip the group. This # is going to change a lot. if event.get_event_type() == "transaction": issueless_event = True else: issueless_event = False # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, "tags", value=[]) set_tag(data, "level", level) if logger_name: set_tag(data, "logger", logger_name) if environment: set_tag(data, "environment", environment) if transaction_name: set_tag(data, "transaction", transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, "release") release = Release.get_or_create(project=project, version=release, date_added=date) set_tag(data, "sentry:release", release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, "dist") set_tag(data, "sentry:dist", dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, "user") set_tag(data, "sentry:user", event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. data["fingerprint"] = data.get("fingerprint") or ["{{ default }}"] apply_server_fingerprinting( data, get_fingerprinting_config_for_project(project)) # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = event.get_hashes() except GroupingConfigNotFound: data["grouping_config"] = get_grouping_config_dict_for_project( project) hashes = event.get_hashes() data["hashes"] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overridden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() data.update(materialized_metadata) data["culprit"] = culprit received_timestamp = event.data.get("received") or float( event.datetime.strftime("%s")) if not issueless_event: # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata["last_received"] = received_timestamp kwargs = { "platform": platform, "message": event.search_message, "culprit": culprit, "logger": logger_name, "level": LOG_LEVELS_MAP.get(level), "last_seen": date, "first_seen": date, "active_at": date, "data": group_metadata, } if release: kwargs["first_release"] = release try: group, is_new, is_regression = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs) except HashDiscarded: event_discarded.send_robust(project=project, sender=EventManager) metrics.incr( "events.discarded", skip_internal=True, tags={ "organization_id": project.organization_id, "platform": platform }, ) raise else: event_saved.send_robust(project=project, event_size=event.size, sender=EventManager) event.group = group else: group = None is_new = False is_regression = False event_saved.send_robust(project=project, event_size=event.size, sender=EventManager) # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) environment = Environment.get_or_create(project=project, name=environment) if group: group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={"first_release": release if release else None}, ) else: is_new_group_environment = False if release: ReleaseEnvironment.get_or_create(project=project, release=release, environment=environment, datetime=date) ReleaseProjectEnvironment.get_or_create(project=project, release=release, environment=environment, datetime=date) if group: grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date) counters = [(tsdb.models.project, project.id)] if group: counters.append((tsdb.models.group, group.id)) if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [] if group: frequencies.append((tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1 } })) if release: frequencies.append((tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1 } })) if frequencies: tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) if group: UserReport.objects.filter(project=project, event_id=event_id).update( group=group, environment=environment) # Enusre the _metrics key exists. This is usually created during # and prefilled with ingestion sizes. event_metrics = event.data.get("_metrics") or {} event.data["_metrics"] = event_metrics # Capture the actual size that goes into node store. event_metrics["bytes.stored.event"] = len( json.dumps(dict(event.data.items()))) # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. attachments = self.get_attachments(cache_key, event) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type, ) event_metrics[key] = (event_metrics.get(key) or 0) + len( attachment.data) # Write the event to Nodestore event.data.save() if event_user: counters = [(tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, ))] if group: counters.append((tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, ))) tsdb.record_multi(counters, timestamp=event.datetime, environment_id=environment.id) if release: if is_new: buffer.incr( ReleaseProject, {"new_groups": 1}, { "release_id": release.id, "project_id": project.id }, ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": release.id, "environment_id": environment.id, }, ) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, event=event, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. self.save_attachments(attachments, event) metric_tags = {"from_relay": "_relay_processed" in self._data} metrics.timing("events.latency", received_timestamp - recorded_timestamp, tags=metric_tags) metrics.timing("events.size.data.post_save", event.size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(self._data.get("errors") or ()), tags=metric_tags, ) return event
def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(data, project)) normalize_stacktraces_for_grouping(data, grouping_config) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project)) # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = event.get_hashes() except GroupingConfigNotFound: data['grouping_config'] = get_grouping_config_dict_for_project(project) hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # Update any event attachment that arrived before the event group was defined. EventAttachment.objects.filter( project_id=project.id, event_id=event_id, ).update( group_id=group.id, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute( Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) metrics.timing( 'events.size.data.post_save', event.size, tags={'project_id': project.id} ) return event
def get_project_config(project_id, full_config=True, for_store=False): """ Constructs the ProjectConfig information. :param project_id: the project id as int or string :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param for_store: If set to true, this omits all parameters that are not needed for store normalization. This is a temporary flag that should be removed once store has been moved to Relay. Most importantly, this avoids database accesses. :return: a ProjectConfig object for the given project """ project = _get_project_from_id(six.text_type(project_id)) if project is None: raise APIError("Invalid project id:{}".format(project_id)) with configure_scope() as scope: scope.set_tag("project", project.id) if for_store: project_keys = [] else: project_keys = ProjectKey.objects \ .filter(project=project) \ .all() public_keys = {} for project_key in project_keys: public_keys[project_key.public_key] = project_key.status == 0 now = datetime.utcnow().replace(tzinfo=utc) org_options = OrganizationOption.objects.get_all_values( project.organization_id) cfg = { 'disabled': project.status > 0, 'slug': project.slug, 'lastFetch': now, 'lastChange': project.get_option('sentry:relay-rev-lastchange', now), 'rev': project.get_option('sentry:relay-rev', uuid.uuid4().hex), 'publicKeys': public_keys, 'config': { 'allowedDomains': project.get_option('sentry:origins', ['*']), 'trustedRelays': org_options.get('sentry:trusted-relays', []), 'piiConfig': _get_pii_config(project, org_options), }, 'project_id': project.id, } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) # The organization id is only required for reporting when processing events # internally. Do not expose it to external Relays. cfg['organization_id'] = project.organization_id # Explicitly bind Organization so we don't implicitly query it later # this just allows us to comfortably assure that `project.organization` is safe. # This also allows us to pull the object from cache, instead of being # implicitly fetched from database. project.organization = Organization.objects.get_from_cache( id=project.organization_id) if project.organization is not None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) else: org_options = {} project_cfg = cfg['config'] # get the filter settings for this project filter_settings = {} project_cfg['filter_settings'] = filter_settings for flt in get_all_filters(): filter_id = get_filter_key(flt) settings = _load_filter_settings(flt, project) filter_settings[filter_id] = settings invalid_releases = project.get_option(u'sentry:{}'.format(FilterTypes.RELEASES)) if invalid_releases: filter_settings[FilterTypes.RELEASES] = {'releases': invalid_releases} blacklisted_ips = project.get_option('sentry:blacklisted_ips') if blacklisted_ips: filter_settings['client_ips'] = {'blacklisted_ips': blacklisted_ips} error_messages = project.get_option(u'sentry:{}'.format(FilterTypes.ERROR_MESSAGES)) if error_messages: filter_settings[FilterTypes.ERROR_MESSAGES] = {'patterns': error_messages} csp_disallowed_sources = [] if bool(project.get_option('sentry:csp_ignored_sources_defaults', True)): csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES csp_disallowed_sources += project.get_option('sentry:csp_ignored_sources', []) if csp_disallowed_sources: filter_settings['csp'] = {'disallowed_sources': csp_disallowed_sources} scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) project_cfg['scrub_ip_addresses'] = scrub_ip_address scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) project_cfg['scrub_data'] = scrub_data project_cfg['grouping_config'] = get_grouping_config_dict_for_project(project) project_cfg['allowed_domains'] = list(get_origins(project)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) project_cfg['sensitive_fields'] = sensitive_fields exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) project_cfg['exclude_fields'] = exclude_fields scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) project_cfg['scrub_defaults'] = scrub_defaults return ProjectConfig(project, **cfg)
def get_project_config(project, org_options=None, full_config=True, for_store=False): """ Constructs the ProjectConfig information. :param project: The project to load configuration for. Ensure that organization is bound on this object; otherwise it will be loaded from the database. :param org_options: Inject preloaded organization options for faster loading. If ``None``, options are lazy-loaded from the database. :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param for_store: If set to true, this omits all parameters that are not needed for Relay. This is a temporary flag that should be removed once store has been moved to Relay. Most importantly, this avoids database accesses. :return: a ProjectConfig object for the given project """ with configure_scope() as scope: scope.set_tag("project", project.id) if for_store: project_keys = [] else: project_keys = ProjectKey.objects.filter(project=project).all() public_keys = [] for project_key in project_keys: key = { "publicKey": project_key.public_key, "isEnabled": project_key.status == 0 } if full_config: key["numericId"] = project_key.id key["quotas"] = [ quota.to_json() for quota in quotas.get_quotas(project, key=project_key) ] public_keys.append(key) now = datetime.utcnow().replace(tzinfo=utc) if org_options is None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) cfg = { "disabled": project.status > 0, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": project.get_option("sentry:origins", ["*"]), "trustedRelays": org_options.get("sentry:trusted-relays", []), "piiConfig": _get_pii_config(project), "datascrubbingSettings": _get_datascrubbing_settings(project, org_options), }, "project_id": project.id, } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) # The organization id is only required for reporting when processing events # internally. Do not expose it to external Relays. cfg["organization_id"] = project.organization_id project_cfg = cfg["config"] # get the filter settings for this project filter_settings = {} project_cfg["filter_settings"] = filter_settings for flt in get_all_filters(): filter_id = get_filter_key(flt) settings = _load_filter_settings(flt, project) filter_settings[filter_id] = settings invalid_releases = project.get_option(u"sentry:{}".format( FilterTypes.RELEASES)) if invalid_releases: filter_settings[FilterTypes.RELEASES] = {"releases": invalid_releases} blacklisted_ips = project.get_option("sentry:blacklisted_ips") if blacklisted_ips: filter_settings["client_ips"] = {"blacklisted_ips": blacklisted_ips} error_messages = project.get_option(u"sentry:{}".format( FilterTypes.ERROR_MESSAGES)) if error_messages: filter_settings[FilterTypes.ERROR_MESSAGES] = { "patterns": error_messages } csp_disallowed_sources = [] if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)): csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES csp_disallowed_sources += project.get_option("sentry:csp_ignored_sources", []) if csp_disallowed_sources: filter_settings["csp"] = {"disallowed_sources": csp_disallowed_sources} scrub_ip_address = org_options.get("sentry:require_scrub_ip_address", False) or project.get_option( "sentry:scrub_ip_address", False) project_cfg["scrub_ip_addresses"] = scrub_ip_address project_cfg["grouping_config"] = get_grouping_config_dict_for_project( project) project_cfg["allowed_domains"] = list(get_origins(project)) return ProjectConfig(project, **cfg)
def get_full_relay_config(project_id): """ Constructs the internal (big) RelayConfig :param project_id: the project id as int or string :return: FullRelayConfig the relay configuration """ cfg = {} project = _get_project_from_id(six.text_type(project_id)) if project is None: raise APIError("Invalid project id:{}".format(project_id)) cfg['project_id'] = project.id cfg['organization_id'] = project.organization_id # Explicitly bind Organization so we don't implicitly query it later # this just allows us to comfortably assure that `project.organization` is safe. # This also allows us to pull the object from cache, instead of being # implicitly fetched from database. project.organization = Organization.objects.get_from_cache( id=project.organization_id) if project.organization is not None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) else: org_options = {} # get the project options project_cfg = {} cfg['config'] = project_cfg # getting kafka info try: project_cfg['kafka_max_event_size'] = options.get( 'kafka-publisher.max-event-size') project_cfg['kafka_raw_event_sample_rate'] = options.get( 'kafka-publisher.raw-event-sample-rate') except Exception: pass # should we log ? invalid_releases = project.get_option(u'sentry:{}'.format( FilterTypes.RELEASES)) if invalid_releases is not None: project_cfg['invalid_releases'] = invalid_releases # get the filters enabled for the current project enabled_filters = [ filter_class.id for filter_class in filters.all() if filter_class(project).is_enabled() ] project_cfg['enabled_filters'] = enabled_filters scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) project_cfg['scrub_ip_addresses'] = scrub_ip_address scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) project_cfg['scrub_data'] = scrub_data project_cfg['grouping_config'] = get_grouping_config_dict_for_project( project) project_cfg['allowed_domains'] = list(get_origins(project)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) project_cfg['sensitive_fields'] = sensitive_fields exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) project_cfg['exclude_fields'] = exclude_fields scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) project_cfg['scrub_defaults'] = scrub_defaults return FullRelayConfig(project, **cfg)
def get_project_config(project_id, full_config=True, for_store=False): """ Constructs the ProjectConfig information. :param project_id: the project id as int or string :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param for_store: If set to true, this omits all parameters that are not needed for store normalization. This is a temporary flag that should be removed once store has been moved to Relay. Most importantly, this avoids database accesses. :return: a ProjectConfig object for the given project """ project = _get_project_from_id(six.text_type(project_id)) if project is None: raise APIError("Invalid project id:{}".format(project_id)) with configure_scope() as scope: scope.set_tag("project", project.id) if for_store: project_keys = [] else: project_keys = ProjectKey.objects.filter(project=project).all() public_keys = {} for project_key in project_keys: public_keys[project_key.public_key] = project_key.status == 0 now = datetime.utcnow().replace(tzinfo=utc) org_options = OrganizationOption.objects.get_all_values( project.organization_id) cfg = { "disabled": project.status > 0, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": project.get_option("sentry:origins", ["*"]), "trustedRelays": org_options.get("sentry:trusted-relays", []), "piiConfig": _get_pii_config(project), "datascrubbingSettings": _get_datascrubbing_settings(project, org_options), }, "project_id": project.id, } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) # The organization id is only required for reporting when processing events # internally. Do not expose it to external Relays. cfg["organization_id"] = project.organization_id # Explicitly bind Organization so we don't implicitly query it later # this just allows us to comfortably assure that `project.organization` is safe. # This also allows us to pull the object from cache, instead of being # implicitly fetched from database. project.organization = Organization.objects.get_from_cache( id=project.organization_id) if project.organization is not None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) else: org_options = {} project_cfg = cfg["config"] # get the filter settings for this project filter_settings = {} project_cfg["filter_settings"] = filter_settings for flt in get_all_filters(): filter_id = get_filter_key(flt) settings = _load_filter_settings(flt, project) filter_settings[filter_id] = settings invalid_releases = project.get_option(u"sentry:{}".format( FilterTypes.RELEASES)) if invalid_releases: filter_settings[FilterTypes.RELEASES] = {"releases": invalid_releases} blacklisted_ips = project.get_option("sentry:blacklisted_ips") if blacklisted_ips: filter_settings["client_ips"] = {"blacklisted_ips": blacklisted_ips} error_messages = project.get_option(u"sentry:{}".format( FilterTypes.ERROR_MESSAGES)) if error_messages: filter_settings[FilterTypes.ERROR_MESSAGES] = { "patterns": error_messages } csp_disallowed_sources = [] if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)): csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES csp_disallowed_sources += project.get_option("sentry:csp_ignored_sources", []) if csp_disallowed_sources: filter_settings["csp"] = {"disallowed_sources": csp_disallowed_sources} scrub_ip_address = org_options.get("sentry:require_scrub_ip_address", False) or project.get_option( "sentry:scrub_ip_address", False) project_cfg["scrub_ip_addresses"] = scrub_ip_address project_cfg["grouping_config"] = get_grouping_config_dict_for_project( project) project_cfg["allowed_domains"] = list(get_origins(project)) return ProjectConfig(project, **cfg)
def _project_has_hierarchical_grouping(project): config_dict = get_grouping_config_dict_for_project(project) config = load_grouping_config(config_dict) return config.initial_context["hierarchical_grouping"]
def get_project_config(project, org_options=None, full_config=True, project_keys=None): """ Constructs the ProjectConfig information. :param project: The project to load configuration for. Ensure that organization is bound on this object; otherwise it will be loaded from the database. :param org_options: Inject preloaded organization options for faster loading. If ``None``, options are lazy-loaded from the database. :param full_config: True if only the full config is required, False if only the restricted (for external relays) is required (default True, i.e. full configuration) :param project_keys: Pre-fetched project keys for performance, similar to org_options. However, if no project keys are provided it is assumed that the config does not need to contain auth information (this is the case when used in python's StoreView) :return: a ProjectConfig object for the given project """ with configure_scope() as scope: scope.set_tag("project", project.id) public_keys = [] for project_key in project_keys or (): key = { "publicKey": project_key.public_key, "isEnabled": project_key.status == 0 } if full_config: key["numericId"] = project_key.id key["quotas"] = [ quota.to_json() for quota in quotas.get_quotas(project, key=project_key) ] public_keys.append(key) now = datetime.utcnow().replace(tzinfo=utc) if org_options is None: org_options = OrganizationOption.objects.get_all_values( project.organization_id) with Hub.current.start_span(op="get_public_config"): cfg = { "disabled": project.status > 0, "slug": project.slug, "lastFetch": now, "lastChange": project.get_option("sentry:relay-rev-lastchange", now), "rev": project.get_option("sentry:relay-rev", uuid.uuid4().hex), "publicKeys": public_keys, "config": { "allowedDomains": list(get_origins(project)), "trustedRelays": org_options.get("sentry:trusted-relays", []), "piiConfig": _get_pii_config(project), "datascrubbingSettings": _get_datascrubbing_settings(project, org_options), }, "projectId": project.id, } if not full_config: # This is all we need for external Relay processors return ProjectConfig(project, **cfg) # The organization id is only required for reporting when processing events # internally. Do not expose it to external Relays. cfg["organizationId"] = project.organization_id project_cfg = cfg["config"] with Hub.current.start_span(op="get_filter_settings"): # get the filter settings for this project filter_settings = {} project_cfg["filterSettings"] = filter_settings for flt in get_all_filters(): filter_id = get_filter_key(flt) settings = _load_filter_settings(flt, project) filter_settings[filter_id] = settings invalid_releases = project.get_option(u"sentry:{}".format( FilterTypes.RELEASES)) if invalid_releases: filter_settings["releases"] = {"releases": invalid_releases} blacklisted_ips = project.get_option("sentry:blacklisted_ips") if blacklisted_ips: filter_settings["clientIps"] = {"blacklistedIps": blacklisted_ips} error_messages = project.get_option(u"sentry:{}".format( FilterTypes.ERROR_MESSAGES)) if error_messages: filter_settings["errorMessages"] = {"patterns": error_messages} csp_disallowed_sources = [] if bool(project.get_option("sentry:csp_ignored_sources_defaults", True)): csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES csp_disallowed_sources += project.get_option( "sentry:csp_ignored_sources", []) if csp_disallowed_sources: filter_settings["csp"] = { "disallowedSources": csp_disallowed_sources } with Hub.current.start_span(op="get_grouping_config_dict_for_project"): project_cfg["groupingConfig"] = get_grouping_config_dict_for_project( project) return ProjectConfig(project, **cfg)
def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None): """ After normalizing and processing an event, save adjacent models such as releases and environments to postgres and write the event into eventstream. From there it will be picked up by Snuba and post-processing. We re-insert events with duplicate IDs into Snuba, which is responsible for deduplicating events. Since deduplication in Snuba is on the primary key (based on event ID, project ID and day), events with same IDs are only deduplicated if their timestamps fall on the same day. The latest event always wins and overwrites the value of events received earlier in that day. Since we increment counters and frequencies here before events get inserted to eventstream these numbers may be larger than the total number of events if we receive duplicate event IDs that fall on the same day (that do not hit cache first). """ # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize(project_id=project_id) self._normalized = True with metrics.timer("event_manager.save.project.get_from_cache"): project = Project.objects.get_from_cache(id=project_id) projects = {project.id: project} if self._data.get("type") == "transaction": self._data["project"] = int(project_id) job = {"data": self._data, "start_time": start_time} jobs = save_transaction_events([job], projects) return jobs[0]["event"] with metrics.timer("event_manager.save.organization.get_from_cache"): project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id ) job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time} jobs = [job] _pull_out_data(jobs, projects) _get_or_create_release_many(jobs, projects) _get_event_user_many(jobs, projects) job["project_key"] = None if job["key_id"] is not None: with metrics.timer("event_manager.load_project_key"): try: job["project_key"] = ProjectKey.objects.get_from_cache(id=job["key_id"]) except ProjectKey.DoesNotExist: pass with metrics.timer("event_manager.load_grouping_config"): # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. grouping_config = load_grouping_config( get_grouping_config_dict_for_event_data(job["data"], project) ) with metrics.timer("event_manager.normalize_stacktraces_for_grouping"): normalize_stacktraces_for_grouping(job["data"], grouping_config) _derive_plugin_tags_many(jobs, projects) _derive_interface_tags_many(jobs) with metrics.timer("event_manager.apply_server_fingerprinting"): # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right parameters. job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"] apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project)) with metrics.timer("event_manager.event.get_hashes"): # Here we try to use the grouping config that was requested in the # event. If that config has since been deleted (because it was an # experimental grouping config) we fall back to the default. try: hashes = job["event"].get_hashes() except GroupingConfigNotFound: job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project) hashes = job["event"].get_hashes() job["data"]["hashes"] = hashes _materialize_metadata_many(jobs) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(job["materialized_metadata"]) group_metadata["last_received"] = job["received_timestamp"] kwargs = { "platform": job["platform"], "message": job["event"].search_message, "culprit": job["culprit"], "logger": job["logger_name"], "level": LOG_LEVELS_MAP.get(job["level"]), "last_seen": job["event"].datetime, "first_seen": job["event"].datetime, "active_at": job["event"].datetime, "data": group_metadata, } if job["release"]: kwargs["first_release"] = job["release"] # Load attachments first, but persist them at the very last after # posting to eventstream to make sure all counters and eventstream are # incremented for sure. Also wait for grouping to remove attachments # based on the group counter. with metrics.timer("event_manager.get_attachments"): attachments = get_attachments(cache_key, job) try: job["group"], job["is_new"], job["is_regression"] = _save_aggregate( event=job["event"], hashes=hashes, release=job["release"], **kwargs ) except HashDiscarded: discard_event(job, attachments) raise job["event"].group = job["group"] # store a reference to the group id to guarantee validation of isolation # XXX(markus): No clue what this does job["event"].data.bind_ref(job["event"]) _get_or_create_environment_many(jobs, projects) if job["group"]: group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create( group_id=job["group"].id, environment_id=job["environment"].id, defaults={"first_release": job["release"] or None}, ) else: job["is_new_group_environment"] = False _get_or_create_release_associated_models(jobs, projects) if job["release"] and job["group"]: job["grouprelease"] = GroupRelease.get_or_create( group=job["group"], release=job["release"], environment=job["environment"], datetime=job["event"].datetime, ) _tsdb_record_all_metrics(jobs) if job["group"]: UserReport.objects.filter(project=project, event_id=job["event"].event_id).update( group=job["group"], environment=job["environment"] ) with metrics.timer("event_manager.filter_attachments_for_group"): attachments = filter_attachments_for_group(attachments, job) # XXX: DO NOT MUTATE THE EVENT PAYLOAD AFTER THIS POINT _materialize_event_metrics(jobs) for attachment in attachments: key = "bytes.stored.%s" % (attachment.type,) old_bytes = job["event_metrics"].get(key) or 0 job["event_metrics"][key] = old_bytes + attachment.size _nodestore_save_many(jobs) save_unprocessed_event(project, event_id=job["event"].event_id) if job["release"]: if job["is_new"]: buffer.incr( ReleaseProject, {"new_groups": 1}, {"release_id": job["release"].id, "project_id": project.id}, ) if job["is_new_group_environment"]: buffer.incr( ReleaseProjectEnvironment, {"new_issues_count": 1}, { "project_id": project.id, "release_id": job["release"].id, "environment_id": job["environment"].id, }, ) if not raw: if not project.first_event: project.update(first_event=job["event"].datetime) first_event_received.send_robust( project=project, event=job["event"], sender=Project ) _eventstream_insert_many(jobs) # Do this last to ensure signals get emitted even if connection to the # file store breaks temporarily. with metrics.timer("event_manager.save_attachments"): save_attachments(cache_key, attachments, job) metric_tags = {"from_relay": "_relay_processed" in job["data"]} metrics.timing( "events.latency", job["received_timestamp"] - job["recorded_timestamp"], tags=metric_tags, ) metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags) metrics.incr( "events.post_save.normalize.errors", amount=len(job["data"].get("errors") or ()), tags=metric_tags, ) _track_outcome_accepted_many(jobs) self._data = job["event"].data.data return job["event"]