Ejemplo n.º 1
0
def repair_tsdb_data(caches, project, events):
    counters, sets, frequencies = collect_tsdb_data(caches, project, events)

    for timestamp, data in counters.items():
        for model, keys in data.items():
            for key, value in keys.items():
                tsdb.incr(model, key, timestamp, value)

    for timestamp, data in sets.items():
        for model, keys in data.items():
            for key, values in keys.items():
                # TODO: This should use `record_multi` rather than `record`.
                tsdb.record(model, key, values, timestamp)

    for timestamp, data in frequencies.items():
        tsdb.record_frequency_multi(data.items(), timestamp)
Ejemplo n.º 2
0
def repair_tsdb_data(caches, project, events):
    counters, sets, frequencies = collect_tsdb_data(caches, project, events)

    for timestamp, data in counters.items():
        for model, keys in data.items():
            for (key, environment_id), value in keys.items():
                tsdb.incr(model, key, timestamp, value, environment_id=environment_id)

    for timestamp, data in sets.items():
        for model, keys in data.items():
            for (key, environment_id), values in keys.items():
                # TODO: This should use `record_multi` rather than `record`.
                tsdb.record(model, key, values, timestamp, environment_id=environment_id)

    for timestamp, data in frequencies.items():
        tsdb.record_frequency_multi(data.items(), timestamp)
Ejemplo n.º 3
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            # if we generate an implicit culprit, lets not call it a
            # transaction
            transaction_name = None
            culprit = generate_culprit(data, platform=platform)
        else:
            transaction_name = culprit

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            tags['sentry:release'] = release

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        # tags are stored as a tuple
        tags = tags.items()

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        for path, iface in six.iteritems(event.interfaces):
            data['tags'].extend(iface.iter_tags())
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [
                md5_from_hash(h)
                for h in get_hashes_from_fingerprint(event, fingerprint)
            ]
        elif checksum:
            hashes = [checksum]
            data['checksum'] = checksum
        else:
            hashes = [
                md5_from_hash(h)
                for h in get_hashes_for_event(event)
            ]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': {
                'last_received': event.data.get('received') or float(event.datetime.strftime('%s')),
                'type': event_type.key,
                # we cache the events metadata on the group to ensure its
                # accessible in the stream
                'metadata': event_metadata,
            },
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event,
            hashes=hashes,
            release=release,
            **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(
                    project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True)
            return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project, event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True)
                return event

            index_event_tags.delay(
                project_id=project.id,
                group_id=group.id,
                event_id=event.id,
                tags=tags,
            )

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
            ), timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags, group, tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id})

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Ejemplo n.º 4
0
    def _save_aggregate(self, event, hashes, release, **kwargs):
        project = event.project

        # attempt to find a matching hash
        all_hashes = self._find_hashes(project, hashes)

        try:
            existing_group_id = (h[0] for h in all_hashes if h[0]).next()
        except StopIteration:
            existing_group_id = None

        # XXX(dcramer): this has the opportunity to create duplicate groups
        # it should be resolved by the hash merging function later but this
        # should be better tested/reviewed
        if existing_group_id is None:
            kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen'])
            group, group_is_new = Group.objects.create(project=project,
                                                       **kwargs), True
        else:
            group = Group.objects.get(id=existing_group_id)

            group_is_new = False

        # If all hashes are brand new we treat this event as new
        is_new = False
        new_hashes = [h[1] for h in all_hashes if h[0] is None]
        if new_hashes:
            affected = GroupHash.objects.filter(
                project=project,
                hash__in=new_hashes,
                group__isnull=True,
            ).update(group=group, )

            if affected != len(new_hashes):
                self._ensure_hashes_merged(group, new_hashes)
            elif group_is_new and len(new_hashes) == len(all_hashes):
                is_new = True

        # XXX(dcramer): it's important this gets called **before** the aggregate
        # is processed as otherwise values like last_seen will get mutated
        can_sample = should_sample(event.datetime, group.last_seen,
                                   group.times_seen)

        if not is_new:
            is_regression = self._process_existing_aggregate(
                group=group,
                event=event,
                data=kwargs,
                release=release,
            )
        else:
            is_regression = False

        # Determine if we've sampled enough data to store this event
        if is_new or is_regression:
            is_sample = False
        else:
            is_sample = can_sample

        tsdb.incr_multi([
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ],
                        timestamp=event.datetime)

        tsdb.record_frequency_multi([
            (tsdb.models.frequent_projects_by_organization, {
                project.organization_id: {
                    project.id: 1,
                },
            }),
            (tsdb.models.frequent_issues_by_project, {
                project.id: {
                    group.id: 1,
                },
            }),
        ],
                                    timestamp=event.datetime)

        return group, is_new, is_regression, is_sample
Ejemplo n.º 5
0
    def _save_aggregate(self, event, hashes, release, **kwargs):
        project = event.project

        # attempt to find a matching hash
        all_hashes = self._find_hashes(project, hashes)

        try:
            existing_group_id = (h[0] for h in all_hashes if h[0]).next()
        except StopIteration:
            existing_group_id = None

        # XXX(dcramer): this has the opportunity to create duplicate groups
        # it should be resolved by the hash merging function later but this
        # should be better tested/reviewed
        if existing_group_id is None:
            kwargs['score'] = ScoreClause.calculate(1, kwargs['last_seen'])
            group, group_is_new = Group.objects.create(
                project=project,
                **kwargs
            ), True
        else:
            group = Group.objects.get(id=existing_group_id)

            group_is_new = False

        # If all hashes are brand new we treat this event as new
        is_new = False
        new_hashes = [h[1] for h in all_hashes if h[0] is None]
        if new_hashes:
            affected = GroupHash.objects.filter(
                project=project,
                hash__in=new_hashes,
                group__isnull=True,
            ).update(
                group=group,
            )

            if affected != len(new_hashes):
                self._ensure_hashes_merged(group, new_hashes)
            elif group_is_new and len(new_hashes) == len(all_hashes):
                is_new = True

        # XXX(dcramer): it's important this gets called **before** the aggregate
        # is processed as otherwise values like last_seen will get mutated
        can_sample = should_sample(
            event.data.get('received') or float(event.datetime.strftime('%s')),
            group.data.get('last_received') or float(group.last_seen.strftime('%s')),
            group.times_seen,
        )

        if not is_new:
            is_regression = self._process_existing_aggregate(
                group=group,
                event=event,
                data=kwargs,
                release=release,
            )
        else:
            is_regression = False

        # Determine if we've sampled enough data to store this event
        if is_new or is_regression:
            is_sample = False
        else:
            is_sample = can_sample

        tsdb.incr_multi([
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ], timestamp=event.datetime)

        tsdb.record_frequency_multi([
            (tsdb.models.frequent_projects_by_organization, {
                project.organization_id: {
                    project.id: 1,
                },
            }),
            (tsdb.models.frequent_issues_by_project, {
                project.id: {
                    group.id: 1,
                },
            }),
        ], timestamp=event.datetime)

        return group, is_new, is_regression, is_sample
Ejemplo n.º 6
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop("event_id")
        level = data.pop("level")

        culprit = data.pop("culprit", None)
        logger_name = data.pop("logger", None)
        server_name = data.pop("server_name", None)
        site = data.pop("site", None)
        checksum = data.pop("checksum", None)
        fingerprint = data.pop("fingerprint", None)
        platform = data.pop("platform", None)
        release = data.pop("release", None)
        environment = data.pop("environment", None)

        # unused
        time_spent = data.pop("time_spent", None)
        message = data.pop("message", "")

        if not culprit:
            culprit = generate_culprit(data, platform=platform)

        date = datetime.fromtimestamp(data.pop("timestamp"))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {"platform": platform}

        event = Event(
            project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs
        )

        tags = data.get("tags") or []
        tags.append(("level", LOG_LEVELS[level]))
        if logger_name:
            tags.append(("logger", logger_name))
        if server_name:
            tags.append(("server_name", server_name))
        if site:
            tags.append(("site", site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(("sentry:release", release))
        if environment:
            tags.append(("environment", environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(("sentry:user", event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data["tags"] = tags

        data["fingerprint"] = fingerprint or ["{{ default }}"]

        for path, iface in event.interfaces.iteritems():
            data["tags"].extend(iface.iter_tags())
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash, get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        # TODO(dcramer): temp workaround for complexity
        data["message"] = message
        event_type = eventtypes.get(data.get("type", "default"))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data["message"]

        data["type"] = event_type.key
        data["metadata"] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != "default":
            if "sentry.interfaces.Message" in data and data["sentry.interfaces.Message"]["message"] != message:
                message = u"{} {}".format(message, data["sentry.interfaces.Message"]["message"])

        if not message:
            message = ""
        elif not isinstance(message, basestring):
            message = force_text(message)

        for value in event_metadata.itervalues():
            value_u = force_text(value, errors="replace")
            if value_u not in message:
                message = u"{} {}".format(message, value_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs["message"] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update(
            {
                "culprit": culprit,
                "logger": logger_name,
                "level": level,
                "last_seen": date,
                "first_seen": date,
                "data": {
                    "last_received": event.data.get("received") or float(event.datetime.strftime("%s")),
                    "type": event_type.key,
                    # we cache the events metadata on the group to ensure its
                    # accessible in the stream
                    "metadata": event_metadata,
                },
            }
        )

        if release:
            release = Release.get_or_create(project=project, version=release, date_added=date)

            group_kwargs["first_release"] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event, hashes=hashes, release=release, **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info("Duplicate EventMapping found for event_id=%s", event_id, exc_info=True)
            return event

        if release:
            grouprelease = GroupRelease.get_or_create(
                group=group, release=release, environment=environment, datetime=date
            )

        tsdb.incr_multi([(tsdb.models.group, group.id), (tsdb.models.project, project.id)], timestamp=event.datetime)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
        ]
        if release:
            frequencies.append((tsdb.models.frequent_releases_by_groups, {group.id: {grouprelease.id: 1}}))

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(project=project, event_id=event_id).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info("Duplicate Event found for event_id=%s", event_id, exc_info=True)
                return event

            index_event_tags.delay(project_id=project.id, group_id=group.id, event_id=event.id, tags=tags)

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
                ),
                timestamp=event.datetime,
            )

        if is_new and release:
            buffer.incr(Release, {"new_groups": 1}, {"id": release.id})

        safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression
            )
        else:
            self.logger.info("Raw event passed; skipping post process for event_id=%s", event_id)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event