def test_context_with_only_app_frames(self): values = [{ 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': {'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }]}, }, { 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': {'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }]}, }] exc = dict(values=values) normalize_in_app({'sentry.interfaces.Exception': exc}) inst = Exception.to_python(exc) self.create_event(data={ 'sentry.interfaces.Exception': inst.to_json(), }) context = inst.get_api_context() assert not context['hasSystemFrames']
def test_context_with_only_app_frames(self): values = [{ 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': { 'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }] }, }, { 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': { 'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }] }, }] exc = dict(values=values) normalize_in_app({'exception': exc}) inst = Exception.to_python(exc) self.create_event(data={ 'exception': inst.to_json(), }) context = inst.get_api_context() assert not context['hasSystemFrames']
def test_context_with_only_app_frames(make_exception_snapshot): values = [{ 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': { 'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }] }, }, { 'type': 'ValueError', 'value': 'hello world', 'module': 'foo.bar', 'stacktrace': { 'frames': [{ 'filename': 'foo/baz.py', 'lineno': 1, 'in_app': True, }] }, }] exc = dict(values=values) normalize_in_app({'exception': exc}) make_exception_snapshot(exc)
def test_normalize_with_system_frames(self): data = { 'stacktrace': { 'frames': [ None, { 'abs_path': 'http://example.com/foo.js', 'filename': 'foo.js', 'lineno': 4, 'colno': 0, 'in_app': True, }, { 'abs_path': 'http://example.com/foo.js', 'filename': 'foo.js', 'lineno': 1, 'colno': 0, }, ] } } normalize_in_app(data) assert data['stacktrace']['frames'][1]['in_app'] is True assert data['stacktrace']['frames'][2]['in_app'] is False
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags project = Project.objects.get_from_cache(id=project) data = self.data.copy() # First we pull out our top-level (non-data attr) kwargs event_id = data.pop('event_id') level = data.pop('level') culprit = data.pop('transaction', None) if not culprit: culprit = data.pop('culprit', None) logger_name = data.pop('logger', None) server_name = data.pop('server_name', None) site = data.pop('site', None) checksum = data.pop('checksum', None) fingerprint = data.pop('fingerprint', None) platform = data.pop('platform', None) release = data.pop('release', None) dist = data.pop('dist', None) environment = data.pop('environment', None) # unused time_spent = data.pop('time_spent', None) message = data.pop('message', '') if not culprit: # if we generate an implicit culprit, lets not call it a # transaction transaction_name = None culprit = generate_culprit(data, platform=platform) else: transaction_name = culprit date = datetime.fromtimestamp(data.pop('timestamp')) date = date.replace(tzinfo=timezone.utc) kwargs = { 'platform': platform, } event = Event( project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs ) event._project_cache = project # convert this to a dict to ensure we're only storing one value per key # as most parts of Sentry dont currently play well with multiple values tags = dict(data.get('tags') or []) tags['level'] = LOG_LEVELS[level] if logger_name: tags['logger'] = logger_name if server_name: tags['server_name'] = server_name if site: tags['site'] = site if environment: tags['environment'] = environment if transaction_name: tags['transaction'] = transaction_name if release: # dont allow a conflicting 'release' tag if 'release' in tags: del tags['release'] release = Release.get_or_create( project=project, version=release, date_added=date, ) tags['sentry:release'] = release.version if dist and release: dist = release.add_dist(dist, date) tags['sentry:dist'] = dist.name else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag if 'user' in tags: del tags['user'] tags['sentry:user'] = event_user.tag_value # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. normalize_in_app(data) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: tags.setdefault(key, value) # tags are stored as a tuple tags = tags.items() # XXX(dcramer): we're relying on mutation of the data object to ensure # this propagates into Event data['tags'] = tags data['fingerprint'] = fingerprint or ['{{ default }}'] for path, iface in six.iteritems(event.interfaces): data['tags'].extend(iface.iter_tags()) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)] elif checksum: hashes = [checksum] data['checksum'] = checksum else: hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)] # TODO(dcramer): temp workaround for complexity data['message'] = message event_type = eventtypes.get(data.get('type', 'default'))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data['message'] data['type'] = event_type.key data['metadata'] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != 'default': if 'sentry.interfaces.Message' in data and \ data['sentry.interfaces.Message']['message'] != message: message = u'{} {}'.format( message, data['sentry.interfaces.Message']['message'], ) if not message: message = '' elif not isinstance(message, six.string_types): message = force_text(message) for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs['message'] = message group_kwargs = kwargs.copy() group_kwargs.update( { 'culprit': culprit, 'logger': logger_name, 'level': level, 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': { 'last_received': event.data.get('received') or float(event.datetime.strftime('%s')), 'type': event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream 'metadata': event_metadata, }, } ) if release: group_kwargs['first_release'] = release group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update(group=group) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event index_event_tags.delay( organization_id=project.organization_id, project_id=project.id, group_id=group.id, event_id=event.id, tags=tags, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime ) if is_new and release: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send(project=project, group=group, sender=Project) post_process_group.delay( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, ) else: self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id}) # TODO: move this to the queue if is_regression and not raw: regression_signal.send_robust(sender=Group, instance=group) return event
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags data = self.data project = Project.objects.get_from_cache(id=project) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, } ) return event # First we pull out our top-level (non-data attr) kwargs event_id = data.pop('event_id') level = data.pop('level') transaction_name = data.pop('transaction', None) culprit = data.pop('culprit', None) logger_name = data.pop('logger', None) server_name = data.pop('server_name', None) site = data.pop('site', None) checksum = data.pop('checksum', None) fingerprint = data.pop('fingerprint', None) platform = data.pop('platform', None) release = data.pop('release', None) dist = data.pop('dist', None) environment = data.pop('environment', None) # unused time_spent = data.pop('time_spent', None) message = data.pop('message', '') if not culprit: if transaction_name: culprit = transaction_name else: culprit = generate_culprit(data, platform=platform) culprit = force_text(culprit) if transaction_name: transaction_name = force_text(transaction_name) recorded_timestamp = data.pop('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) kwargs = { 'platform': platform, } event = Event( project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs ) event._project_cache = project data = event.data.data # convert this to a dict to ensure we're only storing one value per key # as most parts of Sentry dont currently play well with multiple values tags = dict(data.get('tags') or []) tags['level'] = LOG_LEVELS[level] if logger_name: tags['logger'] = logger_name if server_name: tags['server_name'] = server_name if site: tags['site'] = site if environment: tags['environment'] = environment if transaction_name: tags['transaction'] = transaction_name if release: # dont allow a conflicting 'release' tag if 'release' in tags: del tags['release'] release = Release.get_or_create( project=project, version=release, date_added=date, ) tags['sentry:release'] = release.version if dist and release: dist = release.add_dist(dist, date) tags['sentry:dist'] = dist.name else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag if 'user' in tags: del tags['user'] tags['sentry:user'] = event_user.tag_value # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. normalize_in_app(data) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: tags.setdefault(key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): tags[k] = v # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # tags are stored as a tuple tags = tags.items() data['tags'] = tags data['fingerprint'] = fingerprint or ['{{ default }}'] # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)] elif checksum: if HASH_RE.match(checksum): hashes = [checksum] else: hashes = [md5_from_hash([checksum]), checksum] data['checksum'] = checksum else: hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)] # TODO(dcramer): temp workaround for complexity data['message'] = message event_type = eventtypes.get(data.get('type', 'default'))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data['message'] data['type'] = event_type.key data['metadata'] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != 'default': if 'sentry.interfaces.Message' in data and \ data['sentry.interfaces.Message']['message'] != message: message = u'{} {}'.format( message, data['sentry.interfaces.Message']['message'], ) if not message: message = '' elif not isinstance(message, six.string_types): message = force_text(message) for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs['message'] = message received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) group_kwargs = kwargs.copy() group_kwargs.update( { 'culprit': culprit, 'logger': logger_name, 'level': level, 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': { 'last_received': received_timestamp, 'type': event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream 'metadata': event_metadata, }, } ) if release: group_kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs ) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, } ) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release_id': release.id if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info( 'duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, } ) return event index_event_tags.delay( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr( ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, } ) if is_new_group_environment: buffer.incr( ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, } ) safe_execute(Group.objects.add_tags, group, environment, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) return event
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags data = self.data project = Project.objects.get_from_cache(id=project) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: self.logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, }) return event # First we pull out our top-level (non-data attr) kwargs event_id = data.pop('event_id') level = data.pop('level') culprit = data.pop('transaction', None) if not culprit: culprit = data.pop('culprit', None) logger_name = data.pop('logger', None) server_name = data.pop('server_name', None) site = data.pop('site', None) checksum = data.pop('checksum', None) fingerprint = data.pop('fingerprint', None) platform = data.pop('platform', None) release = data.pop('release', None) dist = data.pop('dist', None) environment = data.pop('environment', None) # unused time_spent = data.pop('time_spent', None) message = data.pop('message', '') if not culprit: # if we generate an implicit culprit, lets not call it a # transaction transaction_name = None culprit = generate_culprit(data, platform=platform) else: transaction_name = culprit culprit = force_text(culprit) recorded_timestamp = data.pop('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) kwargs = { 'platform': platform, } event = Event(project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs) event._project_cache = project # convert this to a dict to ensure we're only storing one value per key # as most parts of Sentry dont currently play well with multiple values tags = dict(data.get('tags') or []) tags['level'] = LOG_LEVELS[level] if logger_name: tags['logger'] = logger_name if server_name: tags['server_name'] = server_name if site: tags['site'] = site if environment: tags['environment'] = environment if transaction_name: tags['transaction'] = transaction_name if release: # dont allow a conflicting 'release' tag if 'release' in tags: del tags['release'] release = Release.get_or_create( project=project, version=release, date_added=date, ) tags['sentry:release'] = release.version if dist and release: dist = release.add_dist(dist, date) tags['sentry:dist'] = dist.name else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag if 'user' in tags: del tags['user'] tags['sentry:user'] = event_user.tag_value # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. normalize_in_app(data) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: tags.setdefault(key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): tags[k] = v # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # tags are stored as a tuple tags = tags.items() data['tags'] = tags data['fingerprint'] = fingerprint or ['{{ default }}'] # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = [ md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint) ] elif checksum: if HASH_RE.match(checksum): hashes = [checksum] else: hashes = [md5_from_hash([checksum]), checksum] data['checksum'] = checksum else: hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)] # TODO(dcramer): temp workaround for complexity data['message'] = message event_type = eventtypes.get(data.get('type', 'default'))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data['message'] data['type'] = event_type.key data['metadata'] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != 'default': if 'sentry.interfaces.Message' in data and \ data['sentry.interfaces.Message']['message'] != message: message = u'{} {}'.format( message, data['sentry.interfaces.Message']['message'], ) if not message: message = '' elif not isinstance(message, six.string_types): message = force_text(message) for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs['message'] = message received_timestamp = event.data.get('received') or float( event.datetime.strftime('%s')) group_kwargs = kwargs.copy() group_kwargs.update({ 'culprit': culprit, 'logger': logger_name, 'level': level, 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': { 'last_received': received_timestamp, 'type': event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream 'metadata': event_metadata, }, }) if release: group_kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic( using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, }) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release_id': release.id if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append((tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, })) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, }) return event index_event_tags.delay( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr(ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, }) if is_new_group_environment: buffer.incr(ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, }) safe_execute(Group.objects.add_tags, group, environment, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send(project=project, group=group, sender=Project) post_process_group.delay( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], ) else: self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id}) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) return event
def save(self, project_id, raw=False, assume_normalized=False): # Normalize if needed if not self._normalized: if not assume_normalized: self.normalize() self._normalized = True data = self._data project = Project.objects.get_from_cache(id=project_id) project._organization_cache = Organization.objects.get_from_cache( id=project.organization_id) # Check to make sure we're not about to do a bunch of work that's # already been done if we've processed an event with this ID. (This # isn't a perfect solution -- this doesn't handle ``EventMapping`` and # there's a race condition between here and when the event is actually # saved, but it's an improvement. See GH-7677.) try: event = Event.objects.get( project_id=project.id, event_id=data['event_id'], ) except Event.DoesNotExist: pass else: # Make sure we cache on the project before returning event._project_cache = project logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': data['event_id'], 'project_id': project.id, 'model': Event.__name__, }) return event # Pull out the culprit culprit = self.get_culprit() # Pull the toplevel data we're interested in level = data.get('level') # TODO(mitsuhiko): this code path should be gone by July 2018. # This is going to be fine because no code actually still depends # on integers here. When we need an integer it will be converted # into one later. Old workers used to send integers here. if level is not None and isinstance(level, six.integer_types): level = LOG_LEVELS[level] transaction_name = data.get('transaction') logger_name = data.get('logger') release = data.get('release') dist = data.get('dist') environment = data.get('environment') recorded_timestamp = data.get('timestamp') # We need to swap out the data with the one internal to the newly # created event object event = self._get_event_instance(project_id=project_id) self._data = data = event.data.data event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id if transaction_name: transaction_name = force_text(transaction_name) # Some of the data that are toplevel attributes are duplicated # into tags (logger, level, environment, transaction). These are # different from legacy attributes which are normalized into tags # ahead of time (site, server_name). setdefault_path(data, 'tags', value=[]) set_tag(data, 'level', level) if logger_name: set_tag(data, 'logger', logger_name) if environment: set_tag(data, 'environment', environment) if transaction_name: set_tag(data, 'transaction', transaction_name) if release: # dont allow a conflicting 'release' tag pop_tag(data, 'release') release = Release.get_or_create( project=project, version=release, date_added=date, ) set_tag(data, 'sentry:release', release.version) if dist and release: dist = release.add_dist(dist, date) # dont allow a conflicting 'dist' tag pop_tag(data, 'dist') set_tag(data, 'sentry:dist', dist.name) else: dist = None event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag pop_tag(data, 'user') set_tag(data, 'sentry:user', event_user.tag_value) # At this point we want to normalize the in_app values in case the # clients did not set this appropriately so far. normalize_in_app(data) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: if get_tag(data, key) is None: set_tag(data, key, value) for path, iface in six.iteritems(event.interfaces): for k, v in iface.iter_tags(): set_tag(data, k, v) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.path, None) # The active grouping config was put into the event in the # normalize step before. We now also make sure that the # fingerprint was set to `'{{ default }}' just in case someone # removed it from the payload. The call to get_hashes will then # look at `grouping_config` to pick the right paramters. data['fingerprint'] = data.get('fingerprint') or ['{{ default }}'] hashes = event.get_hashes() data['hashes'] = hashes # we want to freeze not just the metadata and type in but also the # derived attributes. The reason for this is that we push this # data into kafka for snuba processing and our postprocessing # picks up the data right from the snuba topic. For most usage # however the data is dynamically overriden by Event.title and # Event.location (See Event.as_dict) materialized_metadata = self.materialize_metadata() event_metadata = materialized_metadata['metadata'] data.update(materialized_metadata) data['culprit'] = culprit # index components into ``Event.message`` # See GH-3248 event.message = self.get_search_message(event_metadata, culprit) received_timestamp = event.data.get('received') or float( event.datetime.strftime('%s')) # The group gets the same metadata as the event when it's flushed but # additionally the `last_received` key is set. This key is used by # _save_aggregate. group_metadata = dict(materialized_metadata) group_metadata['last_received'] = received_timestamp kwargs = { 'platform': platform, 'message': event.message, 'culprit': culprit, 'logger': logger_name, 'level': LOG_LEVELS_MAP.get(level), 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': group_metadata, } if release: kwargs['first_release'] = release try: group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **kwargs) except HashDiscarded: event_discarded.send_robust( project=project, sender=EventManager, ) metrics.incr( 'events.discarded', skip_internal=True, tags={ 'organization_id': project.organization_id, 'platform': platform, }, ) raise else: event_saved.send_robust( project=project, event_size=event.size, sender=EventManager, ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) # When an event was sampled, the canonical source of truth # is the EventMapping table since we aren't going to be writing out an actual # Event row. Otherwise, if the Event isn't being sampled, we can safely # rely on the Event table itself as the source of truth and ignore # EventMapping since it's redundant information. if is_sample: try: with transaction.atomic( using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': EventMapping.__name__, }) return event environment = Environment.get_or_create( project=project, name=environment, ) group_environment, is_new_group_environment = GroupEnvironment.get_or_create( group_id=group.id, environment_id=environment.id, defaults={ 'first_release': release if release else None, }, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) ReleaseProjectEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append((tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, })) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update( group=group, environment=environment, ) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: logger.info('duplicate.found', exc_info=True, extra={ 'event_uuid': event_id, 'project_id': project.id, 'group_id': group.id, 'model': Event.__name__, }) return event tagstore.delay_index_event_tags( organization_id=project.organization_id, project_id=project.id, group_id=group.id, environment_id=environment.id, event_id=event.id, tags=event.tags, date_added=event.datetime, ) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )), ), timestamp=event.datetime, environment_id=environment.id, ) if release: if is_new: buffer.incr(ReleaseProject, {'new_groups': 1}, { 'release_id': release.id, 'project_id': project.id, }) if is_new_group_environment: buffer.incr(ReleaseProjectEnvironment, {'new_issues_count': 1}, { 'project_id': project.id, 'release_id': release.id, 'environment_id': environment.id, }) safe_execute(Group.objects.add_tags, group, environment, event.get_tags(), _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send_robust(project=project, group=group, sender=Project) eventstream.insert( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, is_new_group_environment=is_new_group_environment, primary_hash=hashes[0], # We are choosing to skip consuming the event back # in the eventstream if it's flagged as raw. # This means that we want to publish the event # through the event stream, but we don't care # about post processing and handling the commit. skip_consume=raw, ) metrics.timing( 'events.latency', received_timestamp - recorded_timestamp, tags={ 'project_id': project.id, }, ) return event