def save_event(cache_key=None,
               data=None,
               start_time=None,
               event_id=None,
               project_id=None,
               **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     })
        return

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        event = manager.save(project_id)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments',
                        event.project.organization,
                        actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted,
                 project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(
                        id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append(
                        (tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time)
            if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
            attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Exemple #2
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager, track_outcome
    from sentry import quotas
    from sentry.models import ProjectKey

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     },
                     skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments',
                        event.project.organization,
                        actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(event.project.organization_id, event.project.id, key_id,
                      'accepted', None, timestamp)

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(project.organization_id, project_id, key_id, 'filtered',
                      reason, timestamp)

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Exemple #3
0
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemple #4
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome
    from sentry.ingest.outcomes_consumer import mark_signal_sent

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = default_cache.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")

        key_id = None if data is None else data.get("key_id")
        if key_id is not None:
            key_id = int(key_id)
        timestamp = to_datetime(start_time) if start_time is not None else None

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        with configure_scope() as scope:
            scope.set_tag("project", project_id)

        event = None
        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                event = manager.save(project_id,
                                     assume_normalized=True,
                                     cache_key=cache_key)

            with metrics.timer("tasks.store.do_save_event.track_outcome"):
                # This is where we can finally say that we have accepted the event.
                track_outcome(
                    event.project.organization_id,
                    event.project.id,
                    key_id,
                    Outcome.ACCEPTED,
                    None,
                    timestamp,
                    event_id,
                )

        except HashDiscarded:
            project = Project.objects.get_from_cache(id=project_id)
            reason = FilterStatKeys.DISCARDED_HASH
            project_key = None
            try:
                if key_id is not None:
                    project_key = ProjectKey.objects.get_from_cache(id=key_id)
            except ProjectKey.DoesNotExist:
                pass

            quotas.refund(project, key=project_key, timestamp=start_time)
            # There is no signal supposed to be sent for this particular
            # outcome-reason combination. Prevent the outcome consumer from
            # emitting it for now.
            #
            # XXX(markus): Revisit decision about signals once outcomes consumer is stable.
            mark_signal_sent(project_id, event_id)
            track_outcome(
                project.organization_id,
                project_id,
                key_id,
                Outcome.FILTERED,
                reason,
                timestamp,
                event_id,
            )

        finally:
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    default_cache.delete(cache_key)

                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    # For the unlikely case that we did not manage to persist the
                    # event we also delete the key always.
                    if event is None or features.has(
                            "organizations:event-attachments",
                            event.project.organization,
                            actor=None):
                        attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])
Exemple #5
0
    def save(self, project_id, raw=False, assume_normalized=False, start_time=None, cache_key=None):
        """
        After normalizing and processing an event, save adjacent models such as
        releases and environments to postgres and write the event into
        eventstream. From there it will be picked up by Snuba and
        post-processing.

        We re-insert events with duplicate IDs into Snuba, which is responsible
        for deduplicating events. Since deduplication in Snuba is on the primary
        key (based on event ID, project ID and day), events with same IDs are only
        deduplicated if their timestamps fall on the same day. The latest event
        always wins and overwrites the value of events received earlier in that day.

        Since we increment counters and frequencies here before events get inserted
        to eventstream these numbers may be larger than the total number of
        events if we receive duplicate event IDs that fall on the same day
        (that do not hit cache first).
        """

        # Normalize if needed
        if not self._normalized:
            if not assume_normalized:
                self.normalize(project_id=project_id)
            self._normalized = True

        with metrics.timer("event_manager.save.project.get_from_cache"):
            project = Project.objects.get_from_cache(id=project_id)

        projects = {project.id: project}

        if self._data.get("type") == "transaction":
            self._data["project"] = int(project_id)
            job = {"data": self._data, "start_time": start_time}
            jobs = save_transaction_events([job], projects)
            return jobs[0]["event"]

        with metrics.timer("event_manager.save.organization.get_from_cache"):
            project._organization_cache = Organization.objects.get_from_cache(
                id=project.organization_id
            )

        job = {"data": self._data, "project_id": project_id, "raw": raw, "start_time": start_time}
        jobs = [job]

        _pull_out_data(jobs, projects)
        _get_or_create_release_many(jobs, projects)
        _get_event_user_many(jobs, projects)

        with metrics.timer("event_manager.load_grouping_config"):
            # At this point we want to normalize the in_app values in case the
            # clients did not set this appropriately so far.
            grouping_config = load_grouping_config(
                get_grouping_config_dict_for_event_data(job["data"], project)
            )

        with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
            normalize_stacktraces_for_grouping(job["data"], grouping_config)

        _derive_plugin_tags_many(jobs, projects)
        _derive_interface_tags_many(jobs)

        with metrics.timer("event_manager.apply_server_fingerprinting"):
            # The active grouping config was put into the event in the
            # normalize step before.  We now also make sure that the
            # fingerprint was set to `'{{ default }}' just in case someone
            # removed it from the payload.  The call to get_hashes will then
            # look at `grouping_config` to pick the right parameters.
            job["data"]["fingerprint"] = job["data"].get("fingerprint") or ["{{ default }}"]
            apply_server_fingerprinting(job["data"], get_fingerprinting_config_for_project(project))

        with metrics.timer("event_manager.event.get_hashes"):
            # Here we try to use the grouping config that was requested in the
            # event.  If that config has since been deleted (because it was an
            # experimental grouping config) we fall back to the default.
            try:
                hashes = job["event"].get_hashes()
            except GroupingConfigNotFound:
                job["data"]["grouping_config"] = get_grouping_config_dict_for_project(project)
                hashes = job["event"].get_hashes()

        job["data"]["hashes"] = hashes

        _materialize_metadata_many(jobs)

        # The group gets the same metadata as the event when it's flushed but
        # additionally the `last_received` key is set.  This key is used by
        # _save_aggregate.
        group_metadata = dict(job["materialized_metadata"])
        group_metadata["last_received"] = job["received_timestamp"]
        kwargs = {
            "platform": job["platform"],
            "message": job["event"].search_message,
            "culprit": job["culprit"],
            "logger": job["logger_name"],
            "level": LOG_LEVELS_MAP.get(job["level"]),
            "last_seen": job["event"].datetime,
            "first_seen": job["event"].datetime,
            "active_at": job["event"].datetime,
            "data": group_metadata,
        }

        if job["release"]:
            kwargs["first_release"] = job["release"]

        try:
            job["group"], job["is_new"], job["is_regression"] = _save_aggregate(
                event=job["event"], hashes=hashes, release=job["release"], **kwargs
            )
        except HashDiscarded:
            project_key = None
            if job["key_id"] is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(id=job["key_id"])
                except ProjectKey.DoesNotExist:
                    pass

            quotas.refund(project, key=project_key, timestamp=start_time)

            track_outcome(
                org_id=project.organization_id,
                project_id=project_id,
                key_id=job["key_id"],
                outcome=Outcome.FILTERED,
                reason=FilterStatKeys.DISCARDED_HASH,
                timestamp=to_datetime(job["start_time"]),
                event_id=job["event"].event_id,
                category=job["category"],
            )

            metrics.incr(
                "events.discarded",
                skip_internal=True,
                tags={"organization_id": project.organization_id, "platform": job["platform"]},
            )
            raise

        job["event"].group = job["group"]

        # store a reference to the group id to guarantee validation of isolation
        # XXX(markus): No clue what this does
        job["event"].data.bind_ref(job["event"])

        _get_or_create_environment_many(jobs, projects)

        if job["group"]:
            group_environment, job["is_new_group_environment"] = GroupEnvironment.get_or_create(
                group_id=job["group"].id,
                environment_id=job["environment"].id,
                defaults={"first_release": job["release"] or None},
            )
        else:
            job["is_new_group_environment"] = False

        _get_or_create_release_associated_models(jobs, projects)

        if job["release"] and job["group"]:
            job["grouprelease"] = GroupRelease.get_or_create(
                group=job["group"],
                release=job["release"],
                environment=job["environment"],
                datetime=job["event"].datetime,
            )

        _tsdb_record_all_metrics(jobs)

        if job["group"]:
            UserReport.objects.filter(project=project, event_id=job["event"].event_id).update(
                group=job["group"], environment=job["environment"]
            )

        _materialize_event_metrics(jobs)

        # Load attachments first, but persist them at the very last after
        # posting to eventstream to make sure all counters and eventstream are
        # incremented for sure.
        attachments = get_attachments(cache_key, job["event"])
        for attachment in attachments:
            key = "bytes.stored.%s" % (attachment.type,)
            job["event_metrics"][key] = (job["event_metrics"].get(key) or 0) + len(attachment.data)

        _nodestore_save_many(jobs)

        if job["release"]:
            if job["is_new"]:
                buffer.incr(
                    ReleaseProject,
                    {"new_groups": 1},
                    {"release_id": job["release"].id, "project_id": project.id},
                )
            if job["is_new_group_environment"]:
                buffer.incr(
                    ReleaseProjectEnvironment,
                    {"new_issues_count": 1},
                    {
                        "project_id": project.id,
                        "release_id": job["release"].id,
                        "environment_id": job["environment"].id,
                    },
                )

        if not raw:
            if not project.first_event:
                project.update(first_event=job["event"].datetime)
                first_event_received.send_robust(
                    project=project, event=job["event"], sender=Project
                )

        _eventstream_insert_many(jobs)

        # Do this last to ensure signals get emitted even if connection to the
        # file store breaks temporarily.
        save_attachments(attachments, job["event"])

        metric_tags = {"from_relay": "_relay_processed" in job["data"]}

        metrics.timing(
            "events.latency",
            job["received_timestamp"] - job["recorded_timestamp"],
            tags=metric_tags,
        )
        metrics.timing("events.size.data.post_save", job["event"].size, tags=metric_tags)
        metrics.incr(
            "events.post_save.normalize.errors",
            amount=len(job["data"].get("errors") or ()),
            tags=metric_tags,
        )

        _track_outcome_accepted_many(jobs)

        self._data = job["event"].data.data
        return job["event"]
Exemple #6
0
def save_event(cache_key=None,
               data=None,
               start_time=None,
               event_id=None,
               **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     })
        return

    project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        manager.save(project_id)
    except HashDiscarded as exc:
        # TODO(jess): remove this before it goes out to a wider audience
        info_logger.info('discarded.hash',
                         extra={
                             'project_id': project_id,
                             'description': exc.message,
                         })

        tsdb.incr(
            tsdb.models.project_total_received_discarded,
            project_id,
            timestamp=to_datetime(start_time)
            if start_time is not None else None,
        )

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(
                        id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Exemple #7
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None,
               project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        manager.save(project_id)
    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted, project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append((tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time) if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Exemple #8
0
def filter_attachments_for_group(attachments, job):
    """
    Removes crash reports exceeding the group-limit.

    If the project or organization is configured to limit the amount of crash
    reports per group, the number of stored crashes is limited. This requires
    `event.group` to be set.

    Emits one outcome per removed attachment.

    :param attachments: The full list of attachments to filter.
    :param job:         The job context container.
    """
    if not attachments:
        return attachments

    event = job["event"]
    project = event.project

    # The setting is both an organization and project setting. The project
    # setting strictly overrides the organization setting, unless set to the
    # default.
    max_crashreports = get_max_crashreports(project, allow_none=True)
    if max_crashreports is None:
        max_crashreports = get_max_crashreports(project.organization)

    # The number of crash reports is cached per group
    crashreports_key = get_crashreport_key(event.group_id)

    # Only fetch the number of stored crash reports if there is a crash report
    # in the list of attachments. Otherwise, we won't require this number.
    if any(attachment.type in CRASH_REPORT_TYPES for attachment in attachments):
        cached_reports = get_stored_crashreports(crashreports_key, event, max_crashreports)
    else:
        cached_reports = 0
    stored_reports = cached_reports

    filtered = []
    refund_quantity = 0
    for attachment in attachments:
        # If the attachment is a crash report (e.g. minidump), we need to honor
        # the store_crash_reports setting. Otherwise, we assume that the client
        # has already verified PII and just store the attachment.
        if attachment.type in CRASH_REPORT_TYPES:
            if crashreports_exceeded(stored_reports, max_crashreports):
                # Indicate that the crash report has been removed due to a limit
                # on the maximum number of crash reports. If this flag is True,
                # it indicates that there are *other* events in the same group
                # that store a crash report. This flag will therefore *not* be
                # set if storage of crash reports is completely disabled.
                if max_crashreports > 0:
                    job["data"]["metadata"]["stripped_crash"] = True

                track_outcome(
                    org_id=event.project.organization_id,
                    project_id=job["project_id"],
                    key_id=job["key_id"],
                    outcome=Outcome.FILTERED,
                    reason=FilterStatKeys.CRASH_REPORT_LIMIT,
                    timestamp=to_datetime(job["start_time"]),
                    event_id=event.event_id,
                    category=DataCategory.ATTACHMENT,
                    quantity=attachment.size,
                )

                # Quotas are counted with at least ``1`` for attachments.
                refund_quantity += attachment.size or 1
                continue
            stored_reports += 1

        filtered.append(attachment)

    # Check if we have exceeded the stored crash reports count. If so, we
    # persist the current maximum (not the actual number!) into the cache. Next
    # time when loading from the cache, we will validate that this number has
    # not changed, or otherwise re-fetch from the database.
    if crashreports_exceeded(stored_reports, max_crashreports) and stored_reports > cached_reports:
        cache.set(crashreports_key, max_crashreports, CRASH_REPORT_TIMEOUT)

    if refund_quantity:
        quotas.refund(
            project,
            key=job["project_key"],
            timestamp=job["start_time"],
            category=DataCategory.ATTACHMENT,
            quantity=refund_quantity,
        )

    return filtered
Exemple #9
0
def discard_event(job, attachments):
    """
    Refunds consumed quotas for an event and its attachments.

    For the event and each dropped attachment, an outcome
    FILTERED(discarded-hash) is emitted.

    :param job:         The job context container.
    :param attachments: The full list of attachments to filter.
    """

    project = job["event"].project

    quotas.refund(
        project,
        key=job["project_key"],
        timestamp=job["start_time"],
        category=job["category"],
        quantity=1,
    )

    track_outcome(
        org_id=project.organization_id,
        project_id=job["project_id"],
        key_id=job["key_id"],
        outcome=Outcome.FILTERED,
        reason=FilterStatKeys.DISCARDED_HASH,
        timestamp=to_datetime(job["start_time"]),
        event_id=job["event"].event_id,
        category=job["category"],
    )

    attachment_quantity = 0
    for attachment in attachments:
        # Quotas are counted with at least ``1`` for attachments.
        attachment_quantity += attachment.size or 1

        track_outcome(
            org_id=project.organization_id,
            project_id=job["project_id"],
            key_id=job["key_id"],
            outcome=Outcome.FILTERED,
            reason=FilterStatKeys.DISCARDED_HASH,
            timestamp=to_datetime(job["start_time"]),
            event_id=job["event"].event_id,
            category=DataCategory.ATTACHMENT,
            quantity=attachment.size,
        )

    if attachment_quantity:
        quotas.refund(
            project,
            key=job["project_key"],
            timestamp=job["start_time"],
            category=DataCategory.ATTACHMENT,
            quantity=attachment_quantity,
        )

    metrics.incr(
        "events.discarded", skip_internal=True, tags={"platform": job["platform"]},
    )