コード例 #1
0
ファイル: reprocessing2.py プロジェクト: sugusbs/sentry
def spawn_capture_nodestore_stats(cache_key, project_id, event_id):
    if not _should_capture_nodestore_stats(event_id):
        event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))
        return

    from sentry.tasks.reprocessing2 import capture_nodestore_stats

    capture_nodestore_stats.delay(cache_key=cache_key,
                                  project_id=project_id,
                                  event_id=event_id)
コード例 #2
0
ファイル: post_process.py プロジェクト: blacknode/sentry
def post_process_group(
    is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs
):
    """
    Fires post processing hooks for a group.
    """
    from sentry.eventstore.models import Event
    from sentry.eventstore.processing import event_processing_store
    from sentry.reprocessing2 import is_reprocessed_event
    from sentry.utils import snuba

    with snuba.options_override({"consistent": True}):
        # We use the data being present/missing in the processing store
        # to ensure that we don't duplicate work should the forwarding consumers
        # need to rewind history.
        data = event_processing_store.get(cache_key)
        if not data:
            logger.info(
                "post_process.skipped",
                extra={"cache_key": cache_key, "reason": "missing_cache"},
            )
            return
        event = Event(
            project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data
        )

        set_current_event_project(event.project_id)

        is_transaction_event = not bool(event.group_id)

        from sentry.models import EventDict, Organization, Project

        # Re-bind node data to avoid renormalization. We only want to
        # renormalize when loading old data from the database.
        event.data = EventDict(event.data, skip_renormalization=True)

        # Re-bind Project and Org since we're reading the Event object
        # from cache which may contain stale parent models.
        event.project = Project.objects.get_from_cache(id=event.project_id)
        event.project.set_cached_field_value(
            "organization", Organization.objects.get_from_cache(id=event.project.organization_id)
        )

        # Simplified post processing for transaction events.
        # This should eventually be completely removed and transactions
        # will not go through any post processing.
        if is_transaction_event:
            transaction_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
            )

            event_processing_store.delete_by_key(cache_key)

            return

        is_reprocessed = is_reprocessed_event(event.data)

        # NOTE: we must pass through the full Event object, and not an
        # event_id since the Event object may not actually have been stored
        # in the database due to sampling.
        from sentry.models import Commit, GroupInboxReason
        from sentry.models.group import get_group_with_redirect
        from sentry.models.groupinbox import add_group_to_inbox
        from sentry.rules.processor import RuleProcessor
        from sentry.tasks.groupowner import process_suspect_commits
        from sentry.tasks.servicehooks import process_service_hook

        # Re-bind Group since we're reading the Event object
        # from cache, which may contain a stale group and project
        event.group, _ = get_group_with_redirect(event.group_id)
        event.group_id = event.group.id

        event.group.project = event.project
        event.group.project.set_cached_field_value("organization", event.project.organization)

        bind_organization_context(event.project.organization)

        _capture_stats(event, is_new)

        if is_reprocessed and is_new:
            add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED)

        if not is_reprocessed:
            # we process snoozes before rules as it might create a regression
            # but not if it's new because you can't immediately snooze a new group
            has_reappeared = False if is_new else process_snoozes(event.group)
            if not has_reappeared:  # If true, we added the .UNIGNORED reason already
                if is_new:
                    add_group_to_inbox(event.group, GroupInboxReason.NEW)
                elif is_regression:
                    add_group_to_inbox(event.group, GroupInboxReason.REGRESSION)

            handle_owner_assignment(event.project, event.group, event)

            rp = RuleProcessor(
                event, is_new, is_regression, is_new_group_environment, has_reappeared
            )
            has_alert = False
            # TODO(dcramer): ideally this would fanout, but serializing giant
            # objects back and forth isn't super efficient
            for callback, futures in rp.apply():
                has_alert = True
                safe_execute(callback, event, futures, _with_transaction=False)

            try:
                lock = locks.get(
                    f"w-o:{event.group_id}-d-l",
                    duration=10,
                )
                with lock.acquire():
                    has_commit_key = f"w-o:{event.project.organization_id}-h-c"
                    org_has_commit = cache.get(has_commit_key)
                    if org_has_commit is None:
                        org_has_commit = Commit.objects.filter(
                            organization_id=event.project.organization_id
                        ).exists()
                        cache.set(has_commit_key, org_has_commit, 3600)

                    if org_has_commit:
                        group_cache_key = f"w-o-i:g-{event.group_id}"
                        if cache.get(group_cache_key):
                            metrics.incr(
                                "sentry.tasks.process_suspect_commits.debounce",
                                tags={"detail": "w-o-i:g debounce"},
                            )
                        else:
                            from sentry.utils.committers import get_frame_paths

                            cache.set(group_cache_key, True, 604800)  # 1 week in seconds
                            event_frames = get_frame_paths(event.data)
                            process_suspect_commits.delay(
                                event_id=event.event_id,
                                event_platform=event.platform,
                                event_frames=event_frames,
                                group_id=event.group_id,
                                project_id=event.project_id,
                            )
            except UnableToAcquireLock:
                pass
            except Exception:
                logger.exception("Failed to process suspect commits")

            if features.has("projects:servicehooks", project=event.project):
                allowed_events = {"event.created"}
                if has_alert:
                    allowed_events.add("event.alert")

                if allowed_events:
                    for servicehook_id, events in _get_service_hooks(project_id=event.project_id):
                        if any(e in allowed_events for e in events):
                            process_service_hook.delay(servicehook_id=servicehook_id, event=event)

            from sentry.tasks.sentry_apps import process_resource_change_bound

            if event.get_event_type() == "error" and _should_send_error_created_hooks(
                event.project
            ):
                process_resource_change_bound.delay(
                    action="created", sender="Error", instance_id=event.event_id, instance=event
                )
            if is_new:
                process_resource_change_bound.delay(
                    action="created", sender="Group", instance_id=event.group_id
                )

            from sentry.plugins.base import plugins

            for plugin in plugins.for_project(event.project):
                plugin_post_process_group(
                    plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression
                )

            from sentry import similarity

            safe_execute(similarity.record, event.project, [event], _with_transaction=False)

        # Patch attachments that were ingested on the standalone path.
        update_existing_attachments(event)

        if not is_reprocessed:
            event_processed.send_robust(
                sender=post_process_group,
                project=event.project,
                event=event,
                primary_hash=kwargs.get("primary_hash"),
            )

        with metrics.timer("tasks.post_process.delete_event_cache"):
            event_processing_store.delete_by_key(cache_key)
コード例 #3
0
ファイル: store.py プロジェクト: reevadere-codes/sentry
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    set_current_event_project(project_id)

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = event_processing_store.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")
            set_current_event_project(project_id)

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                manager.save(project_id,
                             assume_normalized=True,
                             start_time=start_time,
                             cache_key=cache_key)
                # Put the updated event back into the cache so that post_process
                # has the most recent data.
                data = manager.get_data()
                if isinstance(data, CANONICAL_TYPES):
                    data = dict(data.items())
                with metrics.timer(
                        "tasks.store.do_save_event.write_processing_cache"):
                    event_processing_store.store(data)
        except HashDiscarded:
            # Delete the event payload from cache since it won't show up in post-processing.
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    event_processing_store.delete_by_key(cache_key)

        finally:
            reprocessing2.mark_event_reprocessed(data)
            if cache_key:
                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])

            time_synthetic_monitoring_event(data, project_id, start_time)
コード例 #4
0
ファイル: store.py プロジェクト: reevadere-codes/sentry
def create_failed_event(cache_key,
                        data,
                        project_id,
                        issues,
                        event_id,
                        start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    set_current_event_project(project_id)

    # We can only create failed events for events that can potentially
    # create failed events.
    if not reprocessing.event_supports_reprocessing(data):
        return False

    # If this event has just been reprocessed with reprocessing-v2, we don't
    # put it through reprocessing-v1 again. The value of reprocessing-v2 is
    # partially that one sees the entire event even in its failed state, all
    # the time.
    if reprocessing2.is_reprocessed_event(data):
        return False

    reprocessing_active = ProjectOption.objects.get_value(
        project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT)

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if (reprocessing_active and reprocessing.get_reprocessing_revision(
            project_id, cached=False) != reprocessing_rev):
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, "sentry:sent_failed_event_hint", False)
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={
                "reprocessing_active": reprocessing_active,
                "issues": issues
            },
        ).send_notification()
        ProjectOption.objects.set_value(project,
                                        "sentry:sent_failed_event_hint", True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = event_processing_store.get(cache_key)

    if data is None:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "raw"
                     },
                     skip_internal=False)
        error_logger.error("process.failed_raw.empty",
                           extra={"cache_key": cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import ProcessingIssue, RawEvent

    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data["timestamp"]).replace(tzinfo=timezone.utc),
        data=data,
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue["scope"],
            object=issue["object"],
            type=issue["type"],
            data=issue["data"],
        )

    event_processing_store.delete_by_key(cache_key)

    return True
コード例 #5
0
ファイル: post_process.py プロジェクト: tamert/sentry
def post_process_group(is_new,
                       is_regression,
                       is_new_group_environment,
                       cache_key,
                       group_id=None,
                       event=None,
                       **kwargs):
    """
    Fires post processing hooks for a group.
    """
    from sentry.eventstore.models import Event
    from sentry.eventstore.processing import event_processing_store
    from sentry.utils import snuba
    from sentry.reprocessing2 import is_reprocessed_event

    with snuba.options_override({"consistent": True}):
        # We use the data being present/missing in the processing store
        # to ensure that we don't duplicate work should the forwarding consumers
        # need to rewind history.
        #
        # While we always send the cache_key and never send the event parameter now,
        # the code to handle `event` has to stick around for a self-hosted release cycle.
        if cache_key and event is None:
            data = event_processing_store.get(cache_key)
            if not data:
                logger.info(
                    "post_process.skipped",
                    extra={
                        "cache_key": cache_key,
                        "reason": "missing_cache"
                    },
                )
                return
            event = Event(project_id=data["project"],
                          event_id=data["event_id"],
                          group_id=group_id,
                          data=data)
        elif event and check_event_already_post_processed(event):
            if cache_key:
                event_processing_store.delete_by_key(cache_key)
            logger.info(
                "post_process.skipped",
                extra={
                    "reason": "duplicate",
                    "project_id": event.project_id,
                    "event_id": event.event_id,
                },
            )
            return

        if is_reprocessed_event(event.data):
            logger.info(
                "post_process.skipped",
                extra={
                    "project_id": event.project_id,
                    "event_id": event.event_id,
                    "reason": "reprocessed",
                },
            )
            return

        set_current_project(event.project_id)

        # NOTE: we must pass through the full Event object, and not an
        # event_id since the Event object may not actually have been stored
        # in the database due to sampling.
        from sentry.models import Project, Organization, EventDict
        from sentry.models.group import get_group_with_redirect
        from sentry.rules.processor import RuleProcessor
        from sentry.tasks.servicehooks import process_service_hook

        # Re-bind node data to avoid renormalization. We only want to
        # renormalize when loading old data from the database.
        event.data = EventDict(event.data, skip_renormalization=True)

        if event.group_id:
            # Re-bind Group since we're reading the Event object
            # from cache, which may contain a stale group and project
            event.group, _ = get_group_with_redirect(event.group_id)
            event.group_id = event.group.id

        # Re-bind Project and Org since we're reading the Event object
        # from cache which may contain stale parent models.
        event.project = Project.objects.get_from_cache(id=event.project_id)
        event.project._organization_cache = Organization.objects.get_from_cache(
            id=event.project.organization_id)
        bind_organization_context(event.project.organization)

        _capture_stats(event, is_new)

        if event.group_id:
            # we process snoozes before rules as it might create a regression
            # but not if it's new because you can't immediately snooze a new group
            has_reappeared = False if is_new else process_snoozes(event.group)

            handle_owner_assignment(event.project, event.group, event)

            rp = RuleProcessor(event, is_new, is_regression,
                               is_new_group_environment, has_reappeared)
            has_alert = False
            # TODO(dcramer): ideally this would fanout, but serializing giant
            # objects back and forth isn't super efficient
            for callback, futures in rp.apply():
                has_alert = True
                with sentry_sdk.start_transaction(op="post_process_group",
                                                  name="rule_processor_apply",
                                                  sampled=True):
                    safe_execute(callback, event, futures)

            if features.has("projects:servicehooks", project=event.project):
                allowed_events = set(["event.created"])
                if has_alert:
                    allowed_events.add("event.alert")

                if allowed_events:
                    for servicehook_id, events in _get_service_hooks(
                            project_id=event.project_id):
                        if any(e in allowed_events for e in events):
                            process_service_hook.delay(
                                servicehook_id=servicehook_id, event=event)

            from sentry.tasks.sentry_apps import process_resource_change_bound

            if event.get_event_type(
            ) == "error" and _should_send_error_created_hooks(event.project):
                process_resource_change_bound.delay(action="created",
                                                    sender="Error",
                                                    instance_id=event.event_id,
                                                    instance=event)
            if is_new:
                process_resource_change_bound.delay(action="created",
                                                    sender="Group",
                                                    instance_id=event.group_id)

            from sentry.plugins.base import plugins

            for plugin in plugins.for_project(event.project):
                plugin_post_process_group(plugin_slug=plugin.slug,
                                          event=event,
                                          is_new=is_new,
                                          is_regresion=is_regression)

        event_processed.send_robust(
            sender=post_process_group,
            project=event.project,
            event=event,
            primary_hash=kwargs.get("primary_hash"),
        )
        with metrics.timer("tasks.post_process.delete_event_cache"):
            event_processing_store.delete_by_key(cache_key)
コード例 #6
0
def capture_nodestore_stats(cache_key, project_id, event_id):
    set_current_project(project_id)

    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    node_id = Event.generate_node_id(project_id, event_id)
    data = nodestore.get(node_id)

    if not data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    old_event_size = _json_size(data)

    unprocessed_data = event_processing_store.get(
        _get_unprocessed_key(cache_key))
    event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))

    tags = {
        "with_reprocessing": bool(unprocessed_data),
        "platform": data.get("platform") or "none",
        "is_minidump": is_minidump_event(data),
    }

    if unprocessed_data:
        metrics.incr("nodestore_stats.with_reprocessing")

        concatenated_size = _json_size(data, unprocessed_data)
        metrics.timing("events.size.concatenated",
                       concatenated_size,
                       tags=tags)
        metrics.timing("events.size.concatenated.ratio",
                       concatenated_size / old_event_size,
                       tags=tags)

        _data = dict(data)
        _data["__nodestore_reprocessing"] = unprocessed_data
        simple_concatenated_size = _json_size(_data)
        metrics.timing("events.size.simple_concatenated",
                       simple_concatenated_size,
                       tags=tags)
        metrics.timing(
            "events.size.simple_concatenated.ratio",
            simple_concatenated_size / old_event_size,
            tags=tags,
        )
    else:
        metrics.incr("nodestore_stats.without_reprocessing")

    new_data, extra_keys = deduplicate(dict(data))
    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits", tags=tags)
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses", tags=tags)
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size, tags=tags)
    metrics.timing("events.size.deduplicated.total_written",
                   total_size,
                   tags=tags)

    metrics.timing("events.size.deduplicated.ratio",
                   event_size / old_event_size,
                   tags=tags)
    metrics.timing("events.size.deduplicated.total_written.ratio",
                   total_size / old_event_size,
                   tags=tags)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )