コード例 #1
0
def reprocess_group(
    project_id,
    group_id,
    remaining_events="delete",
    new_group_id=None,
    query_state=None,
    start_time=None,
    max_events=None,
    acting_user_id=None,
):
    sentry_sdk.set_tag("project", project_id)
    from sentry.reprocessing2 import (
        CannotReprocess,
        logger,
        mark_event_reprocessed,
        reprocess_event,
        start_group_reprocessing,
    )

    if start_time is None:
        assert new_group_id is None
        start_time = time.time()
        new_group_id = start_group_reprocessing(
            project_id,
            group_id,
            max_events=max_events,
            acting_user_id=acting_user_id,
            remaining_events=remaining_events,
        )

    assert new_group_id is not None

    query_state, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[group_id]),
        batch_size=GROUP_REPROCESSING_CHUNK_SIZE,
        state=query_state,
        referrer="reprocessing2.reprocess_group",
    )

    if not events:
        # Need to delay this until we have enqueued all events and stopped
        # iterating over the batch query, if we take care of this in
        # finish_reprocessing it won't work, as for small max_events
        # finish_reprocessing may execute sooner than the last reprocess_group
        # iteration.
        eventstream.exclude_groups(project_id, [group_id])
        return

    remaining_event_ids = []
    remaining_events_min_datetime = None
    remaining_events_max_datetime = None

    for event in events:
        if max_events is None or max_events > 0:
            with sentry_sdk.start_span(op="reprocess_event"):
                try:
                    reprocess_event(
                        project_id=project_id,
                        event_id=event.event_id,
                        start_time=start_time,
                    )
                except CannotReprocess as e:
                    logger.error(f"reprocessing2.{e}")
                except Exception:
                    sentry_sdk.capture_exception()
                else:
                    if max_events is not None:
                        max_events -= 1

                    continue

            # In case of errors while kicking off reprocessing, mark the event
            # as reprocessed such that progressbar advances and the
            # finish_reprocessing task is still correctly spawned.
            mark_event_reprocessed(group_id=group_id, project_id=project_id)

        # In case of errors while kicking off reprocessing or if max_events has
        # been exceeded, do the default action.

        if remaining_events_min_datetime is None or remaining_events_min_datetime > event.datetime:
            remaining_events_min_datetime = event.datetime
        if remaining_events_max_datetime is None or remaining_events_max_datetime < event.datetime:
            remaining_events_max_datetime = event.datetime

        remaining_event_ids.append(event.event_id)

    # len(remaining_event_ids) is upper-bounded by GROUP_REPROCESSING_CHUNK_SIZE
    if remaining_event_ids:
        handle_remaining_events.delay(
            project_id=project_id,
            new_group_id=new_group_id,
            event_ids=remaining_event_ids,
            remaining_events=remaining_events,
            from_timestamp=remaining_events_min_datetime,
            to_timestamp=remaining_events_max_datetime,
        )

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        new_group_id=new_group_id,
        query_state=query_state,
        start_time=start_time,
        max_events=max_events,
        remaining_events=remaining_events,
    )
コード例 #2
0
ファイル: store.py プロジェクト: reevadere-codes/sentry
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """

    set_current_event_project(project_id)

    from sentry.event_manager import EventManager, HashDiscarded

    event_type = "none"

    if cache_key and data is None:
        with metrics.timer(
                "tasks.store.do_save_event.get_cache") as metric_tags:
            data = event_processing_store.get(cache_key)
            if data is not None:
                metric_tags["event_type"] = event_type = data.get(
                    "type") or "none"

    with metrics.global_tags(event_type=event_type):
        if data is not None:
            data = CanonicalKeyDict(data)

        if event_id is None and data is not None:
            event_id = data["event_id"]

        # only when we come from reprocessing we get a project_id sent into
        # the task.
        if project_id is None:
            project_id = data.pop("project")
            set_current_event_project(project_id)

        # We only need to delete raw events for events that support
        # reprocessing.  If the data cannot be found we want to assume
        # that we need to delete the raw event.
        if not data or reprocessing.event_supports_reprocessing(data):
            with metrics.timer("tasks.store.do_save_event.delete_raw_event"):
                delete_raw_event(project_id, event_id, allow_hint_clear=True)

        # This covers two cases: where data is None because we did not manage
        # to fetch it from the default cache or the empty dictionary was
        # stored in the default cache.  The former happens if the event
        # expired while being on the queue, the second happens on reprocessing
        # if the raw event was deleted concurrently while we held on to
        # it.  This causes the node store to delete the data and we end up
        # fetching an empty dict.  We could in theory not invoke `save_event`
        # in those cases but it's important that we always clean up the
        # reprocessing reports correctly or they will screw up the UI.  So
        # to future proof this correctly we just handle this case here.
        if not data:
            metrics.incr("events.failed",
                         tags={
                             "reason": "cache",
                             "stage": "post"
                         },
                         skip_internal=False)
            return

        try:
            with metrics.timer("tasks.store.do_save_event.event_manager.save"):
                manager = EventManager(data)
                # event.project.organization is populated after this statement.
                manager.save(project_id,
                             assume_normalized=True,
                             start_time=start_time,
                             cache_key=cache_key)
                # Put the updated event back into the cache so that post_process
                # has the most recent data.
                data = manager.get_data()
                if isinstance(data, CANONICAL_TYPES):
                    data = dict(data.items())
                with metrics.timer(
                        "tasks.store.do_save_event.write_processing_cache"):
                    event_processing_store.store(data)
        except HashDiscarded:
            # Delete the event payload from cache since it won't show up in post-processing.
            if cache_key:
                with metrics.timer("tasks.store.do_save_event.delete_cache"):
                    event_processing_store.delete_by_key(cache_key)

        finally:
            reprocessing2.mark_event_reprocessed(data)
            if cache_key:
                with metrics.timer(
                        "tasks.store.do_save_event.delete_attachment_cache"):
                    attachment_cache.delete(cache_key)

            if start_time:
                metrics.timing("events.time-to-process",
                               time() - start_time,
                               instance=data["platform"])

            time_synthetic_monitoring_event(data, project_id, start_time)
コード例 #3
0
def handle_remaining_events(
    project_id,
    new_group_id,
    remaining_events,
    # TODO(markus): Should be mandatory arguments.
    event_ids_redis_key=None,
    old_group_id=None,
    # TODO(markus): Deprecated arguments, can remove in next version.
    event_ids=None,
    from_timestamp=None,
    to_timestamp=None,
):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doc comment in sentry.reprocessing2.
    """

    from sentry import buffer
    from sentry.models.group import Group
    from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE, pop_batched_events_from_redis

    if event_ids_redis_key is not None:
        event_ids, from_timestamp, to_timestamp = pop_batched_events_from_redis(
            event_ids_redis_key)

    metrics.timing(
        "events.reprocessing.handle_remaining_events.batch_size",
        len(event_ids),
        sample_rate=1.0,
    )

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(project_id=project_id,
                               event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(
                project_id=project_id,
                event_id__in=event_ids).update(group_id=new_group_id)

        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )

        buffer.incr(Group, {"times_seen": len(event_ids)},
                    {"id": new_group_id})
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")

    if old_group_id is not None:
        from sentry.reprocessing2 import mark_event_reprocessed

        mark_event_reprocessed(group_id=old_group_id,
                               project_id=project_id,
                               num_events=len(event_ids))