def repair_tsdb_data(caches, project, events): counters, sets, frequencies = collect_tsdb_data(caches, project, events) for timestamp, data in counters.items(): for model, keys in data.items(): for key, value in keys.items(): tsdb.incr(model, key, timestamp, value) for timestamp, data in sets.items(): for model, keys in data.items(): for key, values in keys.items(): # TODO: This should use `record_multi` rather than `record`. tsdb.record(model, key, values, timestamp) for timestamp, data in frequencies.items(): tsdb.record_frequency_multi(data.items(), timestamp)
def repair_tsdb_data(caches, project, events): counters, sets, frequencies = collect_tsdb_data(caches, project, events) for timestamp, data in counters.items(): for model, keys in data.items(): for (key, environment_id), value in keys.items(): tsdb.incr(model, key, timestamp, value, environment_id=environment_id) for timestamp, data in sets.items(): for model, keys in data.items(): for (key, environment_id), values in keys.items(): # TODO: This should use `record_multi` rather than `record`. tsdb.record(model, key, values, timestamp, environment_id=environment_id) for timestamp, data in frequencies.items(): tsdb.record_frequency_multi(data.items(), timestamp)
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags project = Project.objects.get_from_cache(id=project) data = self.data.copy() # First we pull out our top-level (non-data attr) kwargs event_id = data.pop('event_id') level = data.pop('level') culprit = data.pop('culprit', None) logger_name = data.pop('logger', None) server_name = data.pop('server_name', None) site = data.pop('site', None) checksum = data.pop('checksum', None) fingerprint = data.pop('fingerprint', None) platform = data.pop('platform', None) release = data.pop('release', None) environment = data.pop('environment', None) # unused time_spent = data.pop('time_spent', None) message = data.pop('message', '') if not culprit: # if we generate an implicit culprit, lets not call it a # transaction transaction_name = None culprit = generate_culprit(data, platform=platform) else: transaction_name = culprit date = datetime.fromtimestamp(data.pop('timestamp')) date = date.replace(tzinfo=timezone.utc) kwargs = { 'platform': platform, } event = Event( project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs ) # convert this to a dict to ensure we're only storing one value per key # as most parts of Sentry dont currently play well with multiple values tags = dict(data.get('tags') or []) tags['level'] = LOG_LEVELS[level] if logger_name: tags['logger'] = logger_name if server_name: tags['server_name'] = server_name if site: tags['site'] = site if environment: tags['environment'] = environment if transaction_name: tags['transaction'] = transaction_name if release: # dont allow a conflicting 'release' tag if 'release' in tags: del tags['release'] tags['sentry:release'] = release event_user = self._get_event_user(project, data) if event_user: # dont allow a conflicting 'user' tag if 'user' in tags: del tags['user'] tags['sentry:user'] = event_user.tag_value for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: # plugins should not override user provided tags for key, value in added_tags: tags.setdefault(key, value) # tags are stored as a tuple tags = tags.items() # XXX(dcramer): we're relying on mutation of the data object to ensure # this propagates into Event data['tags'] = tags data['fingerprint'] = fingerprint or ['{{ default }}'] for path, iface in six.iteritems(event.interfaces): data['tags'].extend(iface.iter_tags()) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = [ md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint) ] elif checksum: hashes = [checksum] data['checksum'] = checksum else: hashes = [ md5_from_hash(h) for h in get_hashes_for_event(event) ] # TODO(dcramer): temp workaround for complexity data['message'] = message event_type = eventtypes.get(data.get('type', 'default'))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data['message'] data['type'] = event_type.key data['metadata'] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != 'default': if 'sentry.interfaces.Message' in data and \ data['sentry.interfaces.Message']['message'] != message: message = u'{} {}'.format( message, data['sentry.interfaces.Message']['message'], ) if not message: message = '' elif not isinstance(message, six.string_types): message = force_text(message) for value in six.itervalues(event_metadata): value_u = force_text(value, errors='replace') if value_u not in message: message = u'{} {}'.format(message, value_u) if culprit and culprit not in message: culprit_u = force_text(culprit, errors='replace') message = u'{} {}'.format(message, culprit_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs['message'] = message group_kwargs = kwargs.copy() group_kwargs.update({ 'culprit': culprit, 'logger': logger_name, 'level': level, 'last_seen': date, 'first_seen': date, 'active_at': date, 'data': { 'last_received': event.data.get('received') or float(event.datetime.strftime('%s')), 'type': event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream 'metadata': event_metadata, }, }) if release: release = Release.get_or_create( project=project, version=release, date_added=date, ) group_kwargs['first_release'] = release group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create( project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True) return event environment = Environment.get_or_create( project=project, name=environment, ) if release: ReleaseEnvironment.get_or_create( project=project, release=release, environment=environment, datetime=date, ) grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date, ) counters = [ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ] if release: counters.append((tsdb.models.release, release.id)) tsdb.incr_multi(counters, timestamp=event.datetime) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) (tsdb.models.frequent_environments_by_group, { group.id: { environment.id: 1, }, }) ] if release: frequencies.append( (tsdb.models.frequent_releases_by_group, { group.id: { grouprelease.id: 1, }, }) ) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter( project=project, event_id=event_id, ).update(group=group) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True) return event index_event_tags.delay( project_id=project.id, group_id=group.id, event_id=event.id, tags=tags, ) if event_user: tsdb.record_multi(( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)), ), timestamp=event.datetime) if is_new and release: buffer.incr(Release, {'new_groups': 1}, { 'id': release.id, }) safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send(project=project, group=group, sender=Project) post_process_group.delay( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression, ) else: self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id}) # TODO: move this to the queue if is_regression and not raw: regression_signal.send_robust(sender=Group, instance=group) return event
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) try: existing_group_id = (h[0] for h in all_hashes if h[0]).next() except StopIteration: existing_group_id = None # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen']) group, group_is_new = Group.objects.create(project=project, **kwargs), True else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h[1] for h in all_hashes if h[0] is None] if new_hashes: affected = GroupHash.objects.filter( project=project, hash__in=new_hashes, group__isnull=True, ).update(group=group, ) if affected != len(new_hashes): self._ensure_hashes_merged(group, new_hashes) elif group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = should_sample(event.datetime, group.last_seen, group.times_seen) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample tsdb.incr_multi([ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ], timestamp=event.datetime) tsdb.record_frequency_multi([ (tsdb.models.frequent_projects_by_organization, { project.organization_id: { project.id: 1, }, }), (tsdb.models.frequent_issues_by_project, { project.id: { group.id: 1, }, }), ], timestamp=event.datetime) return group, is_new, is_regression, is_sample
def _save_aggregate(self, event, hashes, release, **kwargs): project = event.project # attempt to find a matching hash all_hashes = self._find_hashes(project, hashes) try: existing_group_id = (h[0] for h in all_hashes if h[0]).next() except StopIteration: existing_group_id = None # XXX(dcramer): this has the opportunity to create duplicate groups # it should be resolved by the hash merging function later but this # should be better tested/reviewed if existing_group_id is None: kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen']) group, group_is_new = Group.objects.create( project=project, **kwargs ), True else: group = Group.objects.get(id=existing_group_id) group_is_new = False # If all hashes are brand new we treat this event as new is_new = False new_hashes = [h[1] for h in all_hashes if h[0] is None] if new_hashes: affected = GroupHash.objects.filter( project=project, hash__in=new_hashes, group__isnull=True, ).update( group=group, ) if affected != len(new_hashes): self._ensure_hashes_merged(group, new_hashes) elif group_is_new and len(new_hashes) == len(all_hashes): is_new = True # XXX(dcramer): it's important this gets called **before** the aggregate # is processed as otherwise values like last_seen will get mutated can_sample = should_sample( event.data.get('received') or float(event.datetime.strftime('%s')), group.data.get('last_received') or float(group.last_seen.strftime('%s')), group.times_seen, ) if not is_new: is_regression = self._process_existing_aggregate( group=group, event=event, data=kwargs, release=release, ) else: is_regression = False # Determine if we've sampled enough data to store this event if is_new or is_regression: is_sample = False else: is_sample = can_sample tsdb.incr_multi([ (tsdb.models.group, group.id), (tsdb.models.project, project.id), ], timestamp=event.datetime) tsdb.record_frequency_multi([ (tsdb.models.frequent_projects_by_organization, { project.organization_id: { project.id: 1, }, }), (tsdb.models.frequent_issues_by_project, { project.id: { group.id: 1, }, }), ], timestamp=event.datetime) return group, is_new, is_regression, is_sample
def save(self, project, raw=False): from sentry.tasks.post_process import index_event_tags project = Project.objects.get_from_cache(id=project) data = self.data.copy() # First we pull out our top-level (non-data attr) kwargs event_id = data.pop("event_id") level = data.pop("level") culprit = data.pop("culprit", None) logger_name = data.pop("logger", None) server_name = data.pop("server_name", None) site = data.pop("site", None) checksum = data.pop("checksum", None) fingerprint = data.pop("fingerprint", None) platform = data.pop("platform", None) release = data.pop("release", None) environment = data.pop("environment", None) # unused time_spent = data.pop("time_spent", None) message = data.pop("message", "") if not culprit: culprit = generate_culprit(data, platform=platform) date = datetime.fromtimestamp(data.pop("timestamp")) date = date.replace(tzinfo=timezone.utc) kwargs = {"platform": platform} event = Event( project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs ) tags = data.get("tags") or [] tags.append(("level", LOG_LEVELS[level])) if logger_name: tags.append(("logger", logger_name)) if server_name: tags.append(("server_name", server_name)) if site: tags.append(("site", site)) if release: # TODO(dcramer): we should ensure we create Release objects tags.append(("sentry:release", release)) if environment: tags.append(("environment", environment)) for plugin in plugins.for_project(project, version=None): added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False) if added_tags: tags.extend(added_tags) event_user = self._get_event_user(project, data) if event_user: tags.append(("sentry:user", event_user.tag_value)) # XXX(dcramer): we're relying on mutation of the data object to ensure # this propagates into Event data["tags"] = tags data["fingerprint"] = fingerprint or ["{{ default }}"] for path, iface in event.interfaces.iteritems(): data["tags"].extend(iface.iter_tags()) # Get rid of ephemeral interface data if iface.ephemeral: data.pop(iface.get_path(), None) # prioritize fingerprint over checksum as its likely the client defaulted # a checksum whereas the fingerprint was explicit if fingerprint: hashes = map(md5_from_hash, get_hashes_from_fingerprint(event, fingerprint)) elif checksum: hashes = [checksum] else: hashes = map(md5_from_hash, get_hashes_for_event(event)) # TODO(dcramer): temp workaround for complexity data["message"] = message event_type = eventtypes.get(data.get("type", "default"))(data) event_metadata = event_type.get_metadata() # TODO(dcramer): temp workaround for complexity del data["message"] data["type"] = event_type.key data["metadata"] = event_metadata # index components into ``Event.message`` # See GH-3248 if event_type.key != "default": if "sentry.interfaces.Message" in data and data["sentry.interfaces.Message"]["message"] != message: message = u"{} {}".format(message, data["sentry.interfaces.Message"]["message"]) if not message: message = "" elif not isinstance(message, basestring): message = force_text(message) for value in event_metadata.itervalues(): value_u = force_text(value, errors="replace") if value_u not in message: message = u"{} {}".format(message, value_u) message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) event.message = message kwargs["message"] = message group_kwargs = kwargs.copy() group_kwargs.update( { "culprit": culprit, "logger": logger_name, "level": level, "last_seen": date, "first_seen": date, "data": { "last_received": event.data.get("received") or float(event.datetime.strftime("%s")), "type": event_type.key, # we cache the events metadata on the group to ensure its # accessible in the stream "metadata": event_metadata, }, } ) if release: release = Release.get_or_create(project=project, version=release, date_added=date) group_kwargs["first_release"] = release group, is_new, is_regression, is_sample = self._save_aggregate( event=event, hashes=hashes, release=release, **group_kwargs ) event.group = group # store a reference to the group id to guarantee validation of isolation event.data.bind_ref(event) try: with transaction.atomic(using=router.db_for_write(EventMapping)): EventMapping.objects.create(project=project, group=group, event_id=event_id) except IntegrityError: self.logger.info("Duplicate EventMapping found for event_id=%s", event_id, exc_info=True) return event if release: grouprelease = GroupRelease.get_or_create( group=group, release=release, environment=environment, datetime=date ) tsdb.incr_multi([(tsdb.models.group, group.id), (tsdb.models.project, project.id)], timestamp=event.datetime) frequencies = [ # (tsdb.models.frequent_projects_by_organization, { # project.organization_id: { # project.id: 1, # }, # }), # (tsdb.models.frequent_issues_by_project, { # project.id: { # group.id: 1, # }, # }) ] if release: frequencies.append((tsdb.models.frequent_releases_by_groups, {group.id: {grouprelease.id: 1}})) tsdb.record_frequency_multi(frequencies, timestamp=event.datetime) UserReport.objects.filter(project=project, event_id=event_id).update(group=group) # save the event unless its been sampled if not is_sample: try: with transaction.atomic(using=router.db_for_write(Event)): event.save() except IntegrityError: self.logger.info("Duplicate Event found for event_id=%s", event_id, exc_info=True) return event index_event_tags.delay(project_id=project.id, group_id=group.id, event_id=event.id, tags=tags) if event_user: tsdb.record_multi( ( (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)), (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)), ), timestamp=event.datetime, ) if is_new and release: buffer.incr(Release, {"new_groups": 1}, {"id": release.id}) safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False) if not raw: if not project.first_event: project.update(first_event=date) first_event_received.send(project=project, group=group, sender=Project) post_process_group.delay( group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression ) else: self.logger.info("Raw event passed; skipping post process for event_id=%s", event_id) # TODO: move this to the queue if is_regression and not raw: regression_signal.send_robust(sender=Group, instance=group) return event