Example #1
0
    def test_resolved_in_next_release(self):
        self.login_as(user=self.user)

        project = self.create_project()
        project.flags.has_releases = True
        project.save()
        group = self.create_group(project=project)
        Release.get_or_create(
            version='abcd',
            project=project,
        )

        url = '/api/0/issues/{}/'.format(group.id)

        response = self.client.put(
            url, data={
                'status': 'resolvedInNextRelease',
            }
        )
        assert response.status_code == 200, response.content

        group = Group.objects.get(
            id=group.id,
            project=group.project.id,
        )
        assert group.status == GroupStatus.RESOLVED

        assert GroupResolution.objects.filter(
            group=group,
        ).exists()
Example #2
0
    def test_transfer_to_team_releases(self):
        from_org = self.create_organization()
        from_team = self.create_team(organization=from_org)
        to_org = self.create_organization()
        to_team = self.create_team(organization=to_org)

        project = self.create_project(teams=[from_team])

        environment = Environment.get_or_create(project, 'production')
        release = Release.get_or_create(project=project, version='1.0')

        ReleaseProjectEnvironment.objects.create(
            project=project,
            release=release,
            environment=environment,
        )

        assert ReleaseProjectEnvironment.objects.filter(
            project=project,
            release=release,
            environment=environment,
        ).exists()
        assert ReleaseProject.objects.filter(
            project=project,
            release=release,
        ).exists()

        project.transfer_to(team=to_team)

        project = Project.objects.get(id=project.id)

        assert project.teams.count() == 1
        assert project.teams.first() == to_team
        assert project.organization_id == to_org.id

        assert not ReleaseProjectEnvironment.objects.filter(
            project=project,
            release=release,
            environment=environment,
        ).exists()
        assert not ReleaseProject.objects.filter(
            project=project,
            release=release,
        ).exists()
Example #3
0
 def get_release(self, create=False):
     """Convenient helper to return the release for the current data
     and optionally creates the release if it's missing.  In case there
     is no release info it will return `None`.
     """
     release = self.data.get('release')
     if not release:
         return None
     if not create:
         return Release.get(project=self.project, version=self.data['release'])
     timestamp = self.data.get('timestamp')
     if timestamp is not None:
         date = datetime.fromtimestamp(timestamp).replace(tzinfo=timezone.utc)
     else:
         date = None
     return Release.get_or_create(
         project=self.project,
         version=self.data['release'],
         date_added=date,
     )
Example #4
0
 def get_release(self, create=False):
     """Convenient helper to return the release for the current data
     and optionally creates the release if it's missing.  In case there
     is no release info it will return `None`.
     """
     release = self.data.get("release")
     if not release:
         return None
     if not create:
         return Release.get(project=self.project,
                            version=self.data["release"])
     timestamp = self.data.get("timestamp")
     if timestamp is not None:
         date = datetime.fromtimestamp(timestamp).replace(
             tzinfo=timezone.utc)
     else:
         date = None
     return Release.get_or_create(project=self.project,
                                  version=self.data["release"],
                                  date_added=date)
Example #5
0
def debounce_update_release_health_data(organization, project_ids):
    """This causes a flush of snuba health data to the postgres tables once
    per minute for the given projects.
    """
    # Figure out which projects need to get updates from the snuba.
    should_update = {}
    cache_keys = ["debounce-health:%d" % id for id in project_ids]
    cache_data = cache.get_many(cache_keys)
    for project_id, cache_key in izip(project_ids, cache_keys):
        if cache_data.get(cache_key) is None:
            should_update[project_id] = cache_key

    if not should_update:
        return

    projects = {p.id: p for p in Project.objects.get_many_from_cache(should_update.keys())}

    # This gives us updates for all release-projects which have seen new
    # health data over the last 24 hours. It will miss releases where the last
    # date is <24h ago.  We need to aggregate the data for the totals per release
    # manually here now.  This does not take environments into account.
    for project_id, version in get_changed_project_release_model_adoptions(should_update.keys()):
        project = projects.get(project_id)
        if project is None:
            # should not happen
            continue

        # We might have never observed the release.  This for instance can
        # happen if the release only had health data so far.  For these cases
        # we want to create the release the first time we observed it on the
        # health side.
        release = Release.get_or_create(project=project, version=version)

        # Make sure that the release knows about this project.  Like we had before
        # the project might not have been associated with this release yet.
        release.add_project(project)

    # Debounce updates for a minute
    cache.set_many(dict(izip(should_update.values(), [True] * len(should_update))), 60)
Example #6
0
def _get_or_create_release_many(jobs, projects):
    jobs_with_releases = {}
    release_date_added = {}

    for job in jobs:
        if not job["release"]:
            continue

        release_key = (job["project_id"], job["release"])
        jobs_with_releases.setdefault(release_key, []).append(job)
        new_datetime = job["event"].datetime
        old_datetime = release_date_added.get(release_key)
        if old_datetime is None or new_datetime > old_datetime:
            release_date_added[release_key] = new_datetime

    for (project_id,
         version), jobs_to_update in six.iteritems(jobs_with_releases):
        release = Release.get_or_create(
            project=projects[project_id],
            version=version,
            date_added=release_date_added[(project_id, version)],
        )

        for job in jobs_to_update:
            # dont allow a conflicting 'release' tag
            data = job["data"]
            pop_tag(data, "release")
            set_tag(data, "sentry:release", release.version)

            job["release"] = release

            if job["dist"]:
                job["dist"] = job["release"].add_dist(job["dist"],
                                                      job["event"].datetime)

                # dont allow a conflicting 'dist' tag
                pop_tag(job["data"], "dist")
                set_tag(job["data"], "sentry:dist", job["dist"].name)
Example #7
0
    def save(self, project, raw=False):
        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        message = data.pop('message')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        time_spent = data.pop('time_spent', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        if not culprit:
            culprit = generate_culprit(data)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'message': message,
            'platform': platform,
        }

        event = Event(project_id=project.id,
                      event_id=event_id,
                      data=data,
                      time_spent=time_spent,
                      datetime=date,
                      **kwargs)

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))
        if environment:
            tags.append(('environment', environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(('sentry:user', event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash,
                         get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'time_spent_total': time_spent or 0,
            'time_spent_count': time_spent and 1 or 0,
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event, hashes=hashes, release=release, **group_kwargs)

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(project=project,
                                            group=group,
                                            event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s',
                             event_id,
                             exc_info=True)
            return event

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s',
                                 event_id,
                                 exc_info=True)
                return event

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id,
                 (event_user.tag_value, )),
                (tsdb.models.users_affected_by_project, project.id,
                 (event_user.tag_value, )),
            ),
                              timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags,
                     group,
                     tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info(
                'Raw event passed; skipping post process for event_id=%s',
                event_id)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #8
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data['grouping_config'] = get_grouping_config_dict_for_project(project)
            hashes = event.get_hashes()

        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # Update any event attachment that arrived before the event group was defined.
        EventAttachment.objects.filter(
            project_id=project.id,
            event_id=event_id,
        ).update(
            group_id=group.id,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event
Example #9
0
        def create_message_event(template, parameters, environment, release):
            i = next(sequence)

            event_id = uuid.UUID(fields=(
                i,
                0x0,
                0x1000,
                0x80,
                0x80,
                0x808080808080,
            ), ).hex

            tags = [['color', next(tag_values)]]

            if environment:
                tags.append(['environment', environment])

            if release:
                tags.append(['sentry:release', release])

            event = Event.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                message='%s' % (id, ),
                datetime=now + shift(i),
                data={
                    'environment': environment,
                    'type': 'default',
                    'metadata': {
                        'title': template % parameters,
                    },
                    'logentry': {
                        'message': template,
                        'params': parameters,
                        'formatted': template % parameters,
                    },
                    'user': next(user_values),
                    'tags': tags,
                },
            )

            with self.tasks():
                Group.objects.add_tags(
                    source,
                    Environment.objects.get(
                        organization_id=project.organization_id,
                        name=environment),
                    tags=event.tags,
                )

            EventMapping.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                date_added=event.datetime,
            )

            UserReport.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                name='Log Hat',
                email='*****@*****.**',
                comments='Quack',
            )

            if release:
                Release.get_or_create(
                    project=project,
                    version=event.get_tag('sentry:release'),
                    date_added=event.datetime,
                )

            features.record([event])

            return event
Example #10
0
        def create_message_event(template, parameters, environment, release):
            i = next(sequence)

            event_id = uuid.UUID(fields=(i, 0x0, 0x1000, 0x80, 0x80,
                                         0x808080808080)).hex

            tags = [["color", next(tag_values)]]

            if environment:
                tags.append(["environment", environment])

            if release:
                tags.append(["sentry:release", release])

            event = Event.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                message="%s" % (id, ),
                datetime=now + shift(i),
                data={
                    "environment": environment,
                    "type": "default",
                    "metadata": {
                        "title": template % parameters
                    },
                    "logentry": {
                        "message": template,
                        "params": parameters,
                        "formatted": template % parameters,
                    },
                    "user": next(user_values),
                    "tags": tags,
                },
            )

            with self.tasks():
                Group.objects.add_tags(
                    source,
                    Environment.objects.get(
                        organization_id=project.organization_id,
                        name=environment),
                    tags=event.tags,
                )

            UserReport.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                name="Log Hat",
                email="*****@*****.**",
                comments="Quack",
            )

            if release:
                Release.get_or_create(
                    project=project,
                    version=event.get_tag("sentry:release"),
                    date_added=event.datetime,
                )

            features.record([event])

            return event
Example #11
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')

        culprit = data.pop('transaction', None)
        if not culprit:
            culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        dist = data.pop('dist', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            # if we generate an implicit culprit, lets not call it a
            # transaction
            transaction_name = None
            culprit = generate_culprit(data, platform=platform)
        else:
            transaction_name = culprit

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )
        event._project_cache = project

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            tags['sentry:release'] = release.version

        if dist and release:
            dist = release.add_dist(dist, date)
            tags['sentry:dist'] = dist.name
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        normalize_in_app(data)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        # tags are stored as a tuple
        tags = tags.items()

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        for path, iface in six.iteritems(event.interfaces):
            data['tags'].extend(iface.iter_tags())
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)]
        elif checksum:
            hashes = [checksum]
            data['checksum'] = checksum
        else:
            hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update(
            {
                'culprit': culprit,
                'logger': logger_name,
                'level': level,
                'last_seen': date,
                'first_seen': date,
                'active_at': date,
                'data': {
                    'last_received':
                    event.data.get('received') or float(event.datetime.strftime('%s')),
                    'type':
                    event_type.key,
                    # we cache the events metadata on the group to ensure its
                    # accessible in the stream
                    'metadata':
                    event_metadata,
                },
            }
        )

        if release:
            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event, hashes=hashes, release=release, **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': event_id,
                    'project_id': project.id,
                    'group_id': group.id,
                    'model': EventMapping.__name__,
                }
            )
            return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            index_event_tags.delay(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                event_id=event.id,
                tags=tags,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime
            )

        if is_new and release:
            buffer.incr(
                ReleaseProject, {'new_groups': 1}, {
                    'release_id': release.id,
                    'project_id': project.id,
                }
            )

        safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id})

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #12
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            # if we generate an implicit culprit, lets not call it a
            # transaction
            transaction_name = None
            culprit = generate_culprit(data, platform=platform)
        else:
            transaction_name = culprit

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            tags['sentry:release'] = release

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        # tags are stored as a tuple
        tags = tags.items()

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        for path, iface in six.iteritems(event.interfaces):
            data['tags'].extend(iface.iter_tags())
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [
                md5_from_hash(h)
                for h in get_hashes_from_fingerprint(event, fingerprint)
            ]
        elif checksum:
            hashes = [checksum]
            data['checksum'] = checksum
        else:
            hashes = [
                md5_from_hash(h)
                for h in get_hashes_for_event(event)
            ]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': {
                'last_received': event.data.get('received') or float(event.datetime.strftime('%s')),
                'type': event_type.key,
                # we cache the events metadata on the group to ensure its
                # accessible in the stream
                'metadata': event_metadata,
            },
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event,
            hashes=hashes,
            release=release,
            **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(
                    project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True)
            return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project, event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('duplicate.found', extra={'event_id': event.id}, exc_info=True)
                return event

            index_event_tags.delay(
                project_id=project.id,
                group_id=group.id,
                event_id=event.id,
                tags=tags,
            )

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
            ), timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags, group, tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('post_process.skip.raw_event', extra={'event_id': event.id})

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #13
0
    def save(self,
             project_id,
             raw=False,
             assume_normalized=False,
             cache_key=None):
        """
        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get("level")

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get("transaction")
        logger_name = data.get("logger")
        release = data.get("release")
        dist = data.get("dist")
        environment = data.get("environment")
        recorded_timestamp = data.get("timestamp")

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Right now the event type is the signal to skip the group. This
        # is going to change a lot.
        if event.get_event_type() == "transaction":
            issueless_event = True
        else:
            issueless_event = False

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, "tags", value=[])
        set_tag(data, "level", level)
        if logger_name:
            set_tag(data, "logger", logger_name)
        if environment:
            set_tag(data, "environment", environment)
        if transaction_name:
            set_tag(data, "transaction", transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, "release")
            release = Release.get_or_create(project=project,
                                            version=release,
                                            date_added=date)
            set_tag(data, "sentry:release", release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, "dist")
            set_tag(data, "sentry:dist", dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, "user")
            set_tag(data, "sentry:user", event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right parameters.
        data["fingerprint"] = data.get("fingerprint") or ["{{ default }}"]
        apply_server_fingerprinting(
            data, get_fingerprinting_config_for_project(project))

        # Here we try to use the grouping config that was requested in the
        # event.  If that config has since been deleted (because it was an
        # experimental grouping config) we fall back to the default.
        try:
            hashes = event.get_hashes()
        except GroupingConfigNotFound:
            data["grouping_config"] = get_grouping_config_dict_for_project(
                project)
            hashes = event.get_hashes()

        data["hashes"] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overridden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        data.update(materialized_metadata)
        data["culprit"] = culprit

        received_timestamp = event.data.get("received") or float(
            event.datetime.strftime("%s"))

        if not issueless_event:
            # The group gets the same metadata as the event when it's flushed but
            # additionally the `last_received` key is set.  This key is used by
            # _save_aggregate.
            group_metadata = dict(materialized_metadata)
            group_metadata["last_received"] = received_timestamp
            kwargs = {
                "platform": platform,
                "message": event.search_message,
                "culprit": culprit,
                "logger": logger_name,
                "level": LOG_LEVELS_MAP.get(level),
                "last_seen": date,
                "first_seen": date,
                "active_at": date,
                "data": group_metadata,
            }

            if release:
                kwargs["first_release"] = release

            try:
                group, is_new, is_regression = self._save_aggregate(
                    event=event, hashes=hashes, release=release, **kwargs)
            except HashDiscarded:
                event_discarded.send_robust(project=project,
                                            sender=EventManager)

                metrics.incr(
                    "events.discarded",
                    skip_internal=True,
                    tags={
                        "organization_id": project.organization_id,
                        "platform": platform
                    },
                )
                raise
            else:
                event_saved.send_robust(project=project,
                                        event_size=event.size,
                                        sender=EventManager)
            event.group = group
        else:
            group = None
            is_new = False
            is_regression = False
            event_saved.send_robust(project=project,
                                    event_size=event.size,
                                    sender=EventManager)

        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        environment = Environment.get_or_create(project=project,
                                                name=environment)

        if group:
            group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
                group_id=group.id,
                environment_id=environment.id,
                defaults={"first_release": release if release else None},
            )
        else:
            is_new_group_environment = False

        if release:
            ReleaseEnvironment.get_or_create(project=project,
                                             release=release,
                                             environment=environment,
                                             datetime=date)

            ReleaseProjectEnvironment.get_or_create(project=project,
                                                    release=release,
                                                    environment=environment,
                                                    datetime=date)

            if group:
                grouprelease = GroupRelease.get_or_create(
                    group=group,
                    release=release,
                    environment=environment,
                    datetime=date)

        counters = [(tsdb.models.project, project.id)]

        if group:
            counters.append((tsdb.models.group, group.id))

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters,
                        timestamp=event.datetime,
                        environment_id=environment.id)

        frequencies = []

        if group:
            frequencies.append((tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1
                }
            }))

            if release:
                frequencies.append((tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1
                    }
                }))
        if frequencies:
            tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        if group:
            UserReport.objects.filter(project=project,
                                      event_id=event_id).update(
                                          group=group, environment=environment)

        # Enusre the _metrics key exists. This is usually created during
        # and prefilled with ingestion sizes.
        event_metrics = event.data.get("_metrics") or {}
        event.data["_metrics"] = event_metrics

        # Capture the actual size that goes into node store.
        event_metrics["bytes.stored.event"] = len(
            json.dumps(dict(event.data.items())))

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure.
        attachments = self.get_attachments(cache_key, event)
        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type, )
            event_metrics[key] = (event_metrics.get(key) or 0) + len(
                attachment.data)

        # Write the event to Nodestore
        event.data.save()

        if event_user:
            counters = [(tsdb.models.users_affected_by_project, project.id,
                         (event_user.tag_value, ))]

            if group:
                counters.append((tsdb.models.users_affected_by_group, group.id,
                                 (event_user.tag_value, )))

            tsdb.record_multi(counters,
                              timestamp=event.datetime,
                              environment_id=environment.id)

        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {
                        "release_id": release.id,
                        "project_id": project.id
                    },
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": release.id,
                        "environment_id": environment.id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project,
                                                 event=event,
                                                 sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        self.save_attachments(attachments, event)

        metric_tags = {"from_relay": "_relay_processed" in self._data}

        metrics.timing("events.latency",
                       received_timestamp - recorded_timestamp,
                       tags=metric_tags)
        metrics.timing("events.size.data.post_save",
                       event.size,
                       tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(self._data.get("errors") or ()),
            tags=metric_tags,
        )

        return event
Example #14
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags
        data = self.data

        project = Project.objects.get_from_cache(id=project)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            self.logger.info('duplicate.found',
                             exc_info=True,
                             extra={
                                 'event_uuid': data['event_id'],
                                 'project_id': project.id,
                                 'model': Event.__name__,
                             })
            return event

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')
        culprit = data.pop('transaction', None)
        if not culprit:
            culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        dist = data.pop('dist', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            # if we generate an implicit culprit, lets not call it a
            # transaction
            transaction_name = None
            culprit = generate_culprit(data, platform=platform)
        else:
            transaction_name = culprit

        culprit = force_text(culprit)

        recorded_timestamp = data.pop('timestamp')
        date = datetime.fromtimestamp(recorded_timestamp)
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(project_id=project.id,
                      event_id=event_id,
                      data=data,
                      time_spent=time_spent,
                      datetime=date,
                      **kwargs)
        event._project_cache = project

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            tags['sentry:release'] = release.version

        if dist and release:
            dist = release.add_dist(dist, date)
            tags['sentry:dist'] = dist.name
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        normalize_in_app(data)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                tags[k] = v
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # tags are stored as a tuple
        tags = tags.items()

        data['tags'] = tags
        data['fingerprint'] = fingerprint or ['{{ default }}']

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [
                md5_from_hash(h)
                for h in get_hashes_from_fingerprint(event, fingerprint)
            ]
        elif checksum:
            if HASH_RE.match(checksum):
                hashes = [checksum]
            else:
                hashes = [md5_from_hash([checksum]), checksum]
            data['checksum'] = checksum
        else:
            hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        received_timestamp = event.data.get('received') or float(
            event.datetime.strftime('%s'))
        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': {
                'last_received': received_timestamp,
                'type': event_type.key,
                # we cache the events metadata on the group to ensure its
                # accessible in the stream
                'metadata': event_metadata,
            },
        })

        if release:
            group_kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **group_kwargs)
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(
                        using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project,
                                                group=group,
                                                event_id=event_id)
            except IntegrityError:
                self.logger.info('duplicate.found',
                                 exc_info=True,
                                 extra={
                                     'event_uuid': event_id,
                                     'project_id': project.id,
                                     'group_id': group.id,
                                     'model': EventMapping.__name__,
                                 })
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release_id': release.id if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters,
                        timestamp=event.datetime,
                        environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append((tsdb.models.frequent_releases_by_group, {
                group.id: {
                    grouprelease.id: 1,
                },
            }))

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('duplicate.found',
                                 exc_info=True,
                                 extra={
                                     'event_uuid': event_id,
                                     'project_id': project.id,
                                     'group_id': group.id,
                                     'model': Event.__name__,
                                 })
                return event

            index_event_tags.delay(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id,
                     (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id,
                     (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(ReleaseProject, {'new_groups': 1}, {
                    'release_id': release.id,
                    'project_id': project.id,
                })
            if is_new_group_environment:
                buffer.incr(ReleaseProjectEnvironment, {'new_issues_count': 1},
                            {
                                'project_id': project.id,
                                'release_id': release.id,
                                'environment_id': environment.id,
                            })

        safe_execute(Group.objects.add_tags,
                     group,
                     environment,
                     tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project,
                                          group=group,
                                          sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
                is_new_group_environment=is_new_group_environment,
                primary_hash=hashes[0],
            )
        else:
            self.logger.info('post_process.skip.raw_event',
                             extra={'event_id': event.id})

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        return event
    def save(self, project, raw=False):
        # TODO: culprit should default to "most recent" frame in stacktraces when
        # it's not provided.
        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        message = data.pop('message')
        level = data.pop('level')

        culprit = data.pop('culprit', None) or ''
        time_spent = data.pop('time_spent', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'message': message,
            'platform': platform,
        }

        event = Event(project=project,
                      event_id=event_id,
                      data=data,
                      time_spent=time_spent,
                      datetime=date,
                      **kwargs)

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)
        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        # Calculate the checksum from the first highest scoring interface
        if checksum:
            hashes = [checksum]
        elif fingerprint:
            hashes = map(md5_from_hash,
                         get_hashes_from_fingerprint(event, fingerprint))
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'time_spent_total': time_spent or 0,
            'time_spent_count': time_spent and 1 or 0,
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

            Activity.objects.create(
                type=Activity.RELEASE,
                project=project,
                ident=release,
                data={'version': release},
                datetime=date,
            )

        group, is_new, is_regression, is_sample = safe_execute(
            self._save_aggregate, event=event, hashes=hashes, **group_kwargs)

        using = group._state.db

        event.group = group

        try:
            with transaction.atomic():
                EventMapping.objects.create(project=project,
                                            group=group,
                                            event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s',
                             event_id)
            return event

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic():
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s',
                                 event_id)
                return event

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags,
                     group,
                     tags,
                     _with_transaction=False)

        if not raw:
            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info(
                'Raw event passed; skipping post process for event_id=%s',
                event_id)

        index_event.delay(event)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #16
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop("event_id")
        message = data.pop("message")
        level = data.pop("level")

        culprit = data.pop("culprit", None)
        logger_name = data.pop("logger", None)
        server_name = data.pop("server_name", None)
        site = data.pop("site", None)
        checksum = data.pop("checksum", None)
        fingerprint = data.pop("fingerprint", None)
        platform = data.pop("platform", None)
        release = data.pop("release", None)
        environment = data.pop("environment", None)

        # unused
        time_spent = data.pop("time_spent", None)

        if not culprit:
            culprit = generate_culprit(data, platform=platform)

        date = datetime.fromtimestamp(data.pop("timestamp"))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {"message": message, "platform": platform}

        event = Event(
            project_id=project.id, event_id=event_id, data=data, time_spent=time_spent, datetime=date, **kwargs
        )

        tags = data.get("tags") or []
        tags.append(("level", LOG_LEVELS[level]))
        if logger_name:
            tags.append(("logger", logger_name))
        if server_name:
            tags.append(("server_name", server_name))
        if site:
            tags.append(("site", site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(("sentry:release", release))
        if environment:
            tags.append(("environment", environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(("sentry:user", event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data["tags"] = tags

        data["fingerprint"] = fingerprint or ["{{ default }}"]

        # Get rid of ephemeral interface data
        for interface_class, _ in iter_interfaces():
            interface = interface_class()
            if interface.ephemeral:
                data.pop(interface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash, get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        # TODO(dcramer): temp workaround for complexity
        data["message"] = message
        event_type = eventtypes.get(data.get("type", "default"))(data)

        group_kwargs = kwargs.copy()
        group_kwargs.update(
            {
                "culprit": culprit,
                "logger": logger_name,
                "level": level,
                "last_seen": date,
                "first_seen": date,
                "data": {
                    "last_received": event.data.get("received") or float(event.datetime.strftime("%s")),
                    "type": event_type.key,
                    # we cache the events metadata on the group to ensure its
                    # accessible in the stream
                    "metadata": event_type.get_metadata(),
                },
            }
        )

        # TODO(dcramer): temp workaround for complexity
        del data["message"]

        if release:
            release = Release.get_or_create(project=project, version=release, date_added=date)

            group_kwargs["first_release"] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event, hashes=hashes, release=release, **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info("Duplicate EventMapping found for event_id=%s", event_id, exc_info=True)
            return event

        UserReport.objects.filter(project=project, event_id=event_id).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info("Duplicate Event found for event_id=%s", event_id, exc_info=True)
                return event

            index_event_tags.delay(project_id=project.id, event_id=event.id, tags=tags)

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
                ),
                timestamp=event.datetime,
            )

        if is_new and release:
            buffer.incr(Release, {"new_groups": 1}, {"id": release.id})

        safe_execute(Group.objects.add_tags, group, tags, _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group, event=event, is_new=is_new, is_sample=is_sample, is_regression=is_regression
            )
        else:
            self.logger.info("Raw event passed; skipping post process for event_id=%s", event_id)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #17
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            culprit = generate_culprit(data, platform=platform)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(project_id=project.id,
                      event_id=event_id,
                      data=data,
                      time_spent=time_spent,
                      datetime=date,
                      **kwargs)

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))
        if environment:
            tags.append(('environment', environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags,
                                      event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(('sentry:user', event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        # Get rid of ephemeral interface data
        for interface_class, _ in iter_interfaces():
            interface = interface_class()
            if interface.ephemeral:
                data.pop(interface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash,
                         get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, basestring):
            message = force_text(message)

        for value in event_metadata.itervalues():
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'data': {
                'last_received':
                event.data.get('received')
                or float(event.datetime.strftime('%s')),
                'type':
                event_type.key,
                # we cache the events metadata on the group to ensure its
                # accessible in the stream
                'metadata':
                event_metadata,
            },
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event, hashes=hashes, release=release, **group_kwargs)

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(project=project,
                                            group=group,
                                            event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s',
                             event_id,
                             exc_info=True)
            return event

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s',
                                 event_id,
                                 exc_info=True)
                return event

            index_event_tags.delay(
                project_id=project.id,
                group_id=group.id,
                event_id=event.id,
                tags=tags,
            )

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id,
                 (event_user.tag_value, )),
                (tsdb.models.users_affected_by_project, project.id,
                 (event_user.tag_value, )),
            ),
                              timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags,
                     group,
                     tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project,
                                          group=group,
                                          sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info(
                'Raw event passed; skipping post process for event_id=%s',
                event_id)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #18
0
    def save(self, project, raw=False):
        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        message = data.pop('message')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        time_spent = data.pop('time_spent', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        if not culprit:
            culprit = generate_culprit(data)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'message': message,
            'platform': platform,
        }

        event = Event(
            project=project,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))
        if environment:
            tags.append(('environment', environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(('sentry:user', event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or '{{ default }}'

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash, get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'time_spent_total': time_spent or 0,
            'time_spent_count': time_spent and 1 or 0,
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

            Activity.objects.create(
                type=Activity.RELEASE,
                project=project,
                ident=release,
                data={'version': release},
                datetime=date,
            )

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event,
            hashes=hashes,
            release=release,
            **group_kwargs
        )

        event.group = group
        event.group_id = group.id
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic():
                EventMapping.objects.create(
                    project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s', event_id)
            return event

        UserReport.objects.filter(
            project=project, event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic():
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s', event_id)
                return event

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
            ), timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags, group, tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('Raw event passed; skipping post process for event_id=%s', event_id)

        index_event.delay(event)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #19
0
 def setUp(self):
     self.project = self.create_project()
     self.release = Release.get_or_create(self.project, '1.0')
     self.environment1 = Environment.get_or_create(self.project, 'prod')
     self.environment2 = Environment.get_or_create(self.project, 'staging')
     self.timestamp = 1403007314
Example #20
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags
        data = self.data

        project = Project.objects.get_from_cache(id=project)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            self.logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')
        transaction_name = data.pop('transaction', None)
        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        dist = data.pop('dist', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            if transaction_name:
                culprit = transaction_name
            else:
                culprit = generate_culprit(data, platform=platform)

        culprit = force_text(culprit)
        if transaction_name:
            transaction_name = force_text(transaction_name)

        recorded_timestamp = data.pop('timestamp')
        date = datetime.fromtimestamp(recorded_timestamp)
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )
        event._project_cache = project
        data = event.data.data

        # convert this to a dict to ensure we're only storing one value per key
        # as most parts of Sentry dont currently play well with multiple values
        tags = dict(data.get('tags') or [])
        tags['level'] = LOG_LEVELS[level]
        if logger_name:
            tags['logger'] = logger_name
        if server_name:
            tags['server_name'] = server_name
        if site:
            tags['site'] = site
        if environment:
            tags['environment'] = environment
        if transaction_name:
            tags['transaction'] = transaction_name

        if release:
            # dont allow a conflicting 'release' tag
            if 'release' in tags:
                del tags['release']
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            tags['sentry:release'] = release.version

        if dist and release:
            dist = release.add_dist(dist, date)
            tags['sentry:dist'] = dist.name
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            if 'user' in tags:
                del tags['user']
            tags['sentry:user'] = event_user.tag_value

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        normalize_in_app(data)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    tags.setdefault(key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                tags[k] = v
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # tags are stored as a tuple
        tags = tags.items()

        data['tags'] = tags
        data['fingerprint'] = fingerprint or ['{{ default }}']

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = [md5_from_hash(h) for h in get_hashes_from_fingerprint(event, fingerprint)]
        elif checksum:
            if HASH_RE.match(checksum):
                hashes = [checksum]
            else:
                hashes = [md5_from_hash([checksum]), checksum]
            data['checksum'] = checksum
        else:
            hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)]

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, six.string_types):
            message = force_text(message)

        for value in six.itervalues(event_metadata):
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        if culprit and culprit not in message:
            culprit_u = force_text(culprit, errors='replace')
            message = u'{} {}'.format(message, culprit_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))
        group_kwargs = kwargs.copy()
        group_kwargs.update(
            {
                'culprit': culprit,
                'logger': logger_name,
                'level': level,
                'last_seen': date,
                'first_seen': date,
                'active_at': date,
                'data': {
                    'last_received': received_timestamp,
                    'type':
                    event_type.key,
                    # we cache the events metadata on the group to ensure its
                    # accessible in the stream
                    'metadata':
                    event_metadata,
                },
            }
        )

        if release:
            group_kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **group_kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                self.logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release_id': release.id if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            index_event_tags.delay(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(Group.objects.add_tags, group, environment, tags, _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        return event
Example #21
0
 def setUp(self):
     self.project = self.create_project()
     self.release = Release.get_or_create(self.project, "1.0")
     self.environment1 = Environment.get_or_create(self.project, "prod")
     self.environment2 = Environment.get_or_create(self.project, "staging")
     self.timestamp = float(int(time() - 300))
Example #22
0
def debounce_update_release_health_data(organization, project_ids):
    """This causes a flush of snuba health data to the postgres tables once
    per minute for the given projects.
    """
    # Figure out which projects need to get updates from the snuba.
    should_update = {}
    cache_keys = ["debounce-health:%d" % id for id in project_ids]
    cache_data = cache.get_many(cache_keys)
    for project_id, cache_key in izip(project_ids, cache_keys):
        if cache_data.get(cache_key) is None:
            should_update[project_id] = cache_key

    if not should_update:
        return

    projects = {
        p.id: p
        for p in Project.objects.get_many_from_cache(should_update.keys())
    }

    # This gives us updates for all release-projects which have seen new
    # health data over the last days. It will miss releases where the last
    # date is longer than what `get_changed_project_release_model_adoptions`
    # considers recent.
    project_releases = release_health.get_changed_project_release_model_adoptions(
        should_update.keys())

    # Check which we already have rows for.
    existing = set(
        ReleaseProject.objects.filter(
            project_id__in=[x[0] for x in project_releases],
            release__version__in=[x[1] for x in project_releases],
        ).values_list("project_id", "release__version"))
    to_upsert = []
    for key in project_releases:
        if key not in existing:
            to_upsert.append(key)

    if to_upsert:
        dates = release_health.get_oldest_health_data_for_releases(to_upsert)

        for project_id, version in to_upsert:
            project = projects.get(project_id)
            if project is None:
                # should not happen
                continue

            # We might have never observed the release.  This for instance can
            # happen if the release only had health data so far.  For these cases
            # we want to create the release the first time we observed it on the
            # health side.
            release = Release.get_or_create(project=project,
                                            version=version,
                                            date_added=dates.get(
                                                (project_id, version)))

            # Make sure that the release knows about this project.  Like we had before
            # the project might not have been associated with this release yet.
            release.add_project(project)

    # Debounce updates for a minute
    cache.set_many(
        dict(izip(should_update.values(), [True] * len(should_update))), 60)
Example #23
0
    def save(self, project, raw=False):
        from sentry.tasks.post_process import index_event_tags

        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        level = data.pop('level')

        culprit = data.pop('culprit', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        fingerprint = data.pop('fingerprint', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)
        environment = data.pop('environment', None)

        # unused
        time_spent = data.pop('time_spent', None)
        message = data.pop('message', '')

        if not culprit:
            culprit = generate_culprit(data, platform=platform)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'platform': platform,
        }

        event = Event(
            project_id=project.id,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))
        if environment:
            tags.append(('environment', environment))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)

        event_user = self._get_event_user(project, data)
        if event_user:
            tags.append(('sentry:user', event_user.tag_value))

        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        data['fingerprint'] = fingerprint or ['{{ default }}']

        for path, iface in event.interfaces.iteritems():
            data['tags'].extend(iface.iter_tags())
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.get_path(), None)

        # prioritize fingerprint over checksum as its likely the client defaulted
        # a checksum whereas the fingerprint was explicit
        if fingerprint:
            hashes = map(md5_from_hash, get_hashes_from_fingerprint(event, fingerprint))
        elif checksum:
            hashes = [checksum]
        else:
            hashes = map(md5_from_hash, get_hashes_for_event(event))

        # TODO(dcramer): temp workaround for complexity
        data['message'] = message
        event_type = eventtypes.get(data.get('type', 'default'))(data)
        event_metadata = event_type.get_metadata()
        # TODO(dcramer): temp workaround for complexity
        del data['message']

        data['type'] = event_type.key
        data['metadata'] = event_metadata

        # index components into ``Event.message``
        # See GH-3248
        if event_type.key != 'default':
            if 'sentry.interfaces.Message' in data and \
                    data['sentry.interfaces.Message']['message'] != message:
                message = u'{} {}'.format(
                    message,
                    data['sentry.interfaces.Message']['message'],
                )

        if not message:
            message = ''
        elif not isinstance(message, basestring):
            message = force_text(message)

        for value in event_metadata.itervalues():
            value_u = force_text(value, errors='replace')
            if value_u not in message:
                message = u'{} {}'.format(message, value_u)

        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)

        event.message = message
        kwargs['message'] = message

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'data': {
                'last_received': event.data.get('received') or float(event.datetime.strftime('%s')),
                'type': event_type.key,
                # we cache the events metadata on the group to ensure its
                # accessible in the stream
                'metadata': event_metadata,
            },
        })

        if release:
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            group_kwargs['first_release'] = release

        group, is_new, is_regression, is_sample = self._save_aggregate(
            event=event,
            hashes=hashes,
            release=release,
            **group_kwargs
        )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        try:
            with transaction.atomic(using=router.db_for_write(EventMapping)):
                EventMapping.objects.create(
                    project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s', event_id,
                             exc_info=True)
            return event

        if release:
            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        tsdb.incr_multi([
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ], timestamp=event.datetime)

        frequencies = [
            (tsdb.models.frequent_projects_by_organization, {
                project.organization_id: {
                    project.id: 1,
                },
            }),
            (tsdb.models.frequent_issues_by_project, {
                project.id: {
                    group.id: 1,
                },
            })
        ]
        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_groups, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project, event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s', event_id,
                                 exc_info=True)
                return event

            index_event_tags.delay(
                project_id=project.id,
                group_id=group.id,
                event_id=event.id,
                tags=tags,
            )

        if event_user:
            tsdb.record_multi((
                (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value,)),
                (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value,)),
            ), timestamp=event.datetime)

        if is_new and release:
            buffer.incr(Release, {'new_groups': 1}, {
                'id': release.id,
            })

        safe_execute(Group.objects.add_tags, group, tags,
                     _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send(project=project, group=group, sender=Project)

            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('Raw event passed; skipping post process for event_id=%s', event_id)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #24
0
 def setUp(self):
     self.project = self.create_project()
     self.release = Release.get_or_create(self.project, '1.0')
     self.environment1 = Environment.get_or_create(self.project, 'prod')
     self.environment2 = Environment.get_or_create(self.project, 'staging')
     self.timestamp = 1403007314
Example #25
0
    def save(self, project, raw=False):
        # TODO: culprit should default to "most recent" frame in stacktraces when
        # it's not provided.
        project = Project.objects.get_from_cache(id=project)

        data = self.data.copy()

        # First we pull out our top-level (non-data attr) kwargs
        event_id = data.pop('event_id')
        message = data.pop('message')
        level = data.pop('level')

        culprit = data.pop('culprit', None) or ''
        time_spent = data.pop('time_spent', None)
        logger_name = data.pop('logger', None)
        server_name = data.pop('server_name', None)
        site = data.pop('site', None)
        checksum = data.pop('checksum', None)
        platform = data.pop('platform', None)
        release = data.pop('release', None)

        date = datetime.fromtimestamp(data.pop('timestamp'))
        date = date.replace(tzinfo=timezone.utc)

        kwargs = {
            'message': message,
            'platform': platform,
        }

        event = Event(
            project=project,
            event_id=event_id,
            data=data,
            time_spent=time_spent,
            datetime=date,
            **kwargs
        )

        tags = data.get('tags') or []
        tags.append(('level', LOG_LEVELS[level]))
        if logger_name:
            tags.append(('logger', logger_name))
        if server_name:
            tags.append(('server_name', server_name))
        if site:
            tags.append(('site', site))
        if release:
            # TODO(dcramer): we should ensure we create Release objects
            tags.append(('sentry:release', release))

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event,
                                      _with_transaction=False)
            if added_tags:
                tags.extend(added_tags)
        # XXX(dcramer): we're relying on mutation of the data object to ensure
        # this propagates into Event
        data['tags'] = tags

        # Calculate the checksum from the first highest scoring interface
        if checksum:
            hashes = [checksum]
        else:
            hashes = get_hashes_for_event(event)

        group_kwargs = kwargs.copy()
        group_kwargs.update({
            'culprit': culprit,
            'logger': logger_name,
            'level': level,
            'last_seen': date,
            'first_seen': date,
            'time_spent_total': time_spent or 0,
            'time_spent_count': time_spent and 1 or 0,
        })

        if release:
            group_kwargs['first_release'] = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )

            Activity.objects.create(
                type=Activity.RELEASE,
                project=project,
                ident=release,
                data={'version': release},
                datetime=date,
            )

        group, is_new, is_regression, is_sample = safe_execute(
            self._save_aggregate,
            event=event,
            hashes=hashes,
            **group_kwargs
        )

        using = group._state.db

        event.group = group

        try:
            with transaction.atomic():
                EventMapping.objects.create(
                    project=project, group=group, event_id=event_id)
        except IntegrityError:
            self.logger.info('Duplicate EventMapping found for event_id=%s', event_id)
            return event

        UserReport.objects.filter(
            project=project, event_id=event_id,
        ).update(group=group)

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic():
                    event.save()
            except IntegrityError:
                self.logger.info('Duplicate Event found for event_id=%s', event_id)
                return event

        safe_execute(Group.objects.add_tags, group, tags,
                     _with_transaction=False)

        if not raw:
            post_process_group.delay(
                group=group,
                event=event,
                is_new=is_new,
                is_sample=is_sample,
                is_regression=is_regression,
            )
        else:
            self.logger.info('Raw event passed; skipping post process for event_id=%s', event_id)

        index_event.delay(event)

        # TODO: move this to the queue
        if is_regression and not raw:
            regression_signal.send_robust(sender=Group, instance=group)

        return event
Example #26
0
        def create_message_event(template, parameters, environment, release):
            i = next(sequence)

            event_id = uuid.UUID(
                fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080, ),
            ).hex

            tags = [['color', next(tag_values)]]

            if environment:
                tags.append(['environment', environment])

            if release:
                tags.append(['sentry:release', release])

            event = Event.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                message='%s' % (id, ),
                datetime=now + shift(i),
                data={
                    'environment': environment,
                    'type': 'default',
                    'metadata': {
                        'title': template % parameters,
                    },
                    'logentry': {
                        'message': template,
                        'params': parameters,
                        'formatted': template % parameters,
                    },
                    'user': next(user_values),
                    'tags': tags,
                },
            )

            with self.tasks():
                Group.objects.add_tags(
                    source,
                    Environment.objects.get(
                        organization_id=project.organization_id,
                        name=environment
                    ),
                    tags=event.get_tags(),
                )

            EventMapping.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                date_added=event.datetime,
            )

            UserReport.objects.create(
                project_id=project.id,
                group_id=source.id,
                event_id=event_id,
                name='Log Hat',
                email='*****@*****.**',
                comments='Quack',
            )

            if release:
                Release.get_or_create(
                    project=project,
                    version=event.get_tag('sentry:release'),
                    date_added=event.datetime,
                )

            features.record([event])

            return event
Example #27
0
    def save(self, project_id, raw=False, assume_normalized=False):
        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize()
            self._normalized = True

        data = self._data

        project = Project.objects.get_from_cache(id=project_id)
        project._organization_cache = Organization.objects.get_from_cache(
            id=project.organization_id)

        # Check to make sure we're not about to do a bunch of work that's
        # already been done if we've processed an event with this ID. (This
        # isn't a perfect solution -- this doesn't handle ``EventMapping`` and
        # there's a race condition between here and when the event is actually
        # saved, but it's an improvement. See GH-7677.)
        try:
            event = Event.objects.get(
                project_id=project.id,
                event_id=data['event_id'],
            )
        except Event.DoesNotExist:
            pass
        else:
            # Make sure we cache on the project before returning
            event._project_cache = project
            logger.info(
                'duplicate.found',
                exc_info=True,
                extra={
                    'event_uuid': data['event_id'],
                    'project_id': project.id,
                    'model': Event.__name__,
                }
            )
            return event

        # Pull out the culprit
        culprit = self.get_culprit()

        # Pull the toplevel data we're interested in
        level = data.get('level')

        # TODO(mitsuhiko): this code path should be gone by July 2018.
        # This is going to be fine because no code actually still depends
        # on integers here.  When we need an integer it will be converted
        # into one later.  Old workers used to send integers here.
        if level is not None and isinstance(level, six.integer_types):
            level = LOG_LEVELS[level]

        transaction_name = data.get('transaction')
        logger_name = data.get('logger')
        release = data.get('release')
        dist = data.get('dist')
        environment = data.get('environment')
        recorded_timestamp = data.get('timestamp')

        # We need to swap out the data with the one internal to the newly
        # created event object
        event = self._get_event_instance(project_id=project_id)
        self._data = data = event.data.data

        event._project_cache = project

        date = event.datetime
        platform = event.platform
        event_id = event.event_id

        if transaction_name:
            transaction_name = force_text(transaction_name)

        # Some of the data that are toplevel attributes are duplicated
        # into tags (logger, level, environment, transaction).  These are
        # different from legacy attributes which are normalized into tags
        # ahead of time (site, server_name).
        setdefault_path(data, 'tags', value=[])
        set_tag(data, 'level', level)
        if logger_name:
            set_tag(data, 'logger', logger_name)
        if environment:
            set_tag(data, 'environment', environment)
        if transaction_name:
            set_tag(data, 'transaction', transaction_name)

        if release:
            # dont allow a conflicting 'release' tag
            pop_tag(data, 'release')
            release = Release.get_or_create(
                project=project,
                version=release,
                date_added=date,
            )
            set_tag(data, 'sentry:release', release.version)

        if dist and release:
            dist = release.add_dist(dist, date)
            # dont allow a conflicting 'dist' tag
            pop_tag(data, 'dist')
            set_tag(data, 'sentry:dist', dist.name)
        else:
            dist = None

        event_user = self._get_event_user(project, data)
        if event_user:
            # dont allow a conflicting 'user' tag
            pop_tag(data, 'user')
            set_tag(data, 'sentry:user', event_user.tag_value)

        # At this point we want to normalize the in_app values in case the
        # clients did not set this appropriately so far.
        grouping_config = load_grouping_config(
            get_grouping_config_dict_for_event_data(data, project))
        normalize_stacktraces_for_grouping(data, grouping_config)

        for plugin in plugins.for_project(project, version=None):
            added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
            if added_tags:
                # plugins should not override user provided tags
                for key, value in added_tags:
                    if get_tag(data, key) is None:
                        set_tag(data, key, value)

        for path, iface in six.iteritems(event.interfaces):
            for k, v in iface.iter_tags():
                set_tag(data, k, v)
            # Get rid of ephemeral interface data
            if iface.ephemeral:
                data.pop(iface.path, None)

        # The active grouping config was put into the event in the
        # normalize step before.  We now also make sure that the
        # fingerprint was set to `'{{ default }}' just in case someone
        # removed it from the payload.  The call to get_hashes will then
        # look at `grouping_config` to pick the right paramters.
        data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))
        hashes = event.get_hashes()
        data['hashes'] = hashes

        # we want to freeze not just the metadata and type in but also the
        # derived attributes.  The reason for this is that we push this
        # data into kafka for snuba processing and our postprocessing
        # picks up the data right from the snuba topic.  For most usage
        # however the data is dynamically overriden by Event.title and
        # Event.location (See Event.as_dict)
        materialized_metadata = self.materialize_metadata()
        event_metadata = materialized_metadata['metadata']
        data.update(materialized_metadata)
        data['culprit'] = culprit

        # index components into ``Event.message``
        # See GH-3248
        event.message = self.get_search_message(event_metadata, culprit)
        received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(materialized_metadata)
        group_metadata['last_received'] = received_timestamp
        kwargs = {
            'platform': platform,
            'message': event.message,
            'culprit': culprit,
            'logger': logger_name,
            'level': LOG_LEVELS_MAP.get(level),
            'last_seen': date,
            'first_seen': date,
            'active_at': date,
            'data': group_metadata,
        }

        if release:
            kwargs['first_release'] = release

        try:
            group, is_new, is_regression, is_sample = self._save_aggregate(
                event=event, hashes=hashes, release=release, **kwargs
            )
        except HashDiscarded:
            event_discarded.send_robust(
                project=project,
                sender=EventManager,
            )

            metrics.incr(
                'events.discarded',
                skip_internal=True,
                tags={
                    'organization_id': project.organization_id,
                    'platform': platform,
                },
            )
            raise
        else:
            event_saved.send_robust(
                project=project,
                event_size=event.size,
                sender=EventManager,
            )

        event.group = group
        # store a reference to the group id to guarantee validation of isolation
        event.data.bind_ref(event)

        # When an event was sampled, the canonical source of truth
        # is the EventMapping table since we aren't going to be writing out an actual
        # Event row. Otherwise, if the Event isn't being sampled, we can safely
        # rely on the Event table itself as the source of truth and ignore
        # EventMapping since it's redundant information.
        if is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(EventMapping)):
                    EventMapping.objects.create(project=project, group=group, event_id=event_id)
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': EventMapping.__name__,
                    }
                )
                return event

        environment = Environment.get_or_create(
            project=project,
            name=environment,
        )

        group_environment, is_new_group_environment = GroupEnvironment.get_or_create(
            group_id=group.id,
            environment_id=environment.id,
            defaults={
                'first_release': release if release else None,
            },
        )

        if release:
            ReleaseEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            ReleaseProjectEnvironment.get_or_create(
                project=project,
                release=release,
                environment=environment,
                datetime=date,
            )

            grouprelease = GroupRelease.get_or_create(
                group=group,
                release=release,
                environment=environment,
                datetime=date,
            )

        counters = [
            (tsdb.models.group, group.id),
            (tsdb.models.project, project.id),
        ]

        if release:
            counters.append((tsdb.models.release, release.id))

        tsdb.incr_multi(counters, timestamp=event.datetime, environment_id=environment.id)

        frequencies = [
            # (tsdb.models.frequent_projects_by_organization, {
            #     project.organization_id: {
            #         project.id: 1,
            #     },
            # }),
            # (tsdb.models.frequent_issues_by_project, {
            #     project.id: {
            #         group.id: 1,
            #     },
            # })
            (tsdb.models.frequent_environments_by_group, {
                group.id: {
                    environment.id: 1,
                },
            })
        ]

        if release:
            frequencies.append(
                (tsdb.models.frequent_releases_by_group, {
                    group.id: {
                        grouprelease.id: 1,
                    },
                })
            )

        tsdb.record_frequency_multi(frequencies, timestamp=event.datetime)

        UserReport.objects.filter(
            project=project,
            event_id=event_id,
        ).update(
            group=group,
            environment=environment,
        )

        # save the event unless its been sampled
        if not is_sample:
            try:
                with transaction.atomic(using=router.db_for_write(Event)):
                    event.save()
            except IntegrityError:
                logger.info(
                    'duplicate.found',
                    exc_info=True,
                    extra={
                        'event_uuid': event_id,
                        'project_id': project.id,
                        'group_id': group.id,
                        'model': Event.__name__,
                    }
                )
                return event

            tagstore.delay_index_event_tags(
                organization_id=project.organization_id,
                project_id=project.id,
                group_id=group.id,
                environment_id=environment.id,
                event_id=event.id,
                tags=event.tags,
                date_added=event.datetime,
            )

        if event_user:
            tsdb.record_multi(
                (
                    (tsdb.models.users_affected_by_group, group.id, (event_user.tag_value, )),
                    (tsdb.models.users_affected_by_project, project.id, (event_user.tag_value, )),
                ),
                timestamp=event.datetime,
                environment_id=environment.id,
            )
        if release:
            if is_new:
                buffer.incr(
                    ReleaseProject, {'new_groups': 1}, {
                        'release_id': release.id,
                        'project_id': project.id,
                    }
                )
            if is_new_group_environment:
                buffer.incr(
                    ReleaseProjectEnvironment, {'new_issues_count': 1}, {
                        'project_id': project.id,
                        'release_id': release.id,
                        'environment_id': environment.id,
                    }
                )

        safe_execute(
            Group.objects.add_tags,
            group,
            environment,
            event.get_tags(),
            _with_transaction=False)

        if not raw:
            if not project.first_event:
                project.update(first_event=date)
                first_event_received.send_robust(project=project, group=group, sender=Project)

        eventstream.insert(
            group=group,
            event=event,
            is_new=is_new,
            is_sample=is_sample,
            is_regression=is_regression,
            is_new_group_environment=is_new_group_environment,
            primary_hash=hashes[0],
            # We are choosing to skip consuming the event back
            # in the eventstream if it's flagged as raw.
            # This means that we want to publish the event
            # through the event stream, but we don't care
            # about post processing and handling the commit.
            skip_consume=raw,
        )

        metrics.timing(
            'events.latency',
            received_timestamp - recorded_timestamp,
            tags={
                'project_id': project.id,
            },
        )

        metrics.timing(
            'events.size.data.post_save',
            event.size,
            tags={'project_id': project.id}
        )

        return event