コード例 #1
0
ファイル: reprocessing2.py プロジェクト: biperch/sentry
def reprocess_group(
    project_id,
    group_id,
    new_group_id=None,
    query_state=None,
    start_time=None,
    max_events=None,
    acting_user_id=None,
):
    from sentry.reprocessing2 import start_group_reprocessing

    if start_time is None:
        assert new_group_id is None
        start_time = time.time()
        new_group_id = start_group_reprocessing(project_id,
                                                group_id,
                                                max_events=max_events,
                                                acting_user_id=acting_user_id)

    assert new_group_id is not None

    query_state, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[group_id]),
        batch_size=GROUP_REPROCESSING_CHUNK_SIZE,
        state=query_state,
        referrer="reprocessing2.reprocess_group",
    )

    if not events:
        return

    tombstoned_event_ids = []

    for event in events:
        if max_events is None or max_events > 0:
            reprocess_event.delay(
                project_id=project_id,
                event_id=event.event_id,
                start_time=start_time,
            )
            if max_events is not None:
                max_events -= 1
        else:
            tombstoned_event_ids.append(event.event_id)

    # len(tombstoned_event_ids) is upper-bounded by GROUP_REPROCESSING_CHUNK_SIZE
    if tombstoned_event_ids:
        tombstone_events.delay(project_id=project_id,
                               group_id=group_id,
                               event_ids=tombstoned_event_ids)

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        new_group_id=new_group_id,
        query_state=query_state,
        start_time=start_time,
        max_events=max_events,
    )
コード例 #2
0
def unmerge(
    project_id,
    source_id,
    destination_id,
    fingerprints,
    actor_id,
    last_event=None,
    batch_size=500,
    source_fields_reset=False,
    eventstream_state=None,
):
    source = Group.objects.get(project_id=project_id, id=source_id)

    caches = get_caches()

    project = caches["Project"](project_id)

    # On the first iteration of this loop, we clear out all of the
    # denormalizations from the source group so that we can have a clean slate
    # for the new, repaired data.
    if last_event is None:
        fingerprints = lock_hashes(project_id, source_id, fingerprints)
        truncate_denormalizations(project, source)

    last_event, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[source.id]),
        batch_size=batch_size,
        state=last_event,
        referrer="unmerge",
    )

    # If there are no more events to process, we're done with the migration.
    if not events:
        unlock_hashes(project_id, fingerprints)
        logger.warning("Unmerge complete (eventstream state: %s)",
                       eventstream_state)
        if eventstream_state:
            eventstream.end_unmerge(eventstream_state)

        return destination_id

    source_events = []
    destination_events = []

    for event in events:
        (destination_events if get_fingerprint(event) in fingerprints else
         source_events).append(event)

    if source_events:
        if not source_fields_reset:
            source.update(
                **get_group_creation_attributes(caches, source_events))
            source_fields_reset = True
        else:
            source.update(
                **get_group_backfill_attributes(caches, source, source_events))

    (destination_id, eventstream_state) = migrate_events(
        caches,
        project,
        source_id,
        destination_id,
        fingerprints,
        destination_events,
        actor_id,
        eventstream_state,
    )

    repair_denormalizations(caches, project, events)

    unmerge.delay(
        project_id,
        source_id,
        destination_id,
        fingerprints,
        actor_id,
        last_event=last_event,
        batch_size=batch_size,
        source_fields_reset=source_fields_reset,
        eventstream_state=eventstream_state,
    )
コード例 #3
0
def reprocess_group(
    project_id,
    group_id,
    remaining_events="delete",
    new_group_id=None,
    query_state=None,
    start_time=None,
    max_events=None,
    acting_user_id=None,
):
    sentry_sdk.set_tag("project", project_id)
    from sentry.reprocessing2 import (
        CannotReprocess,
        logger,
        mark_event_reprocessed,
        reprocess_event,
        start_group_reprocessing,
    )

    if start_time is None:
        assert new_group_id is None
        start_time = time.time()
        new_group_id = start_group_reprocessing(
            project_id,
            group_id,
            max_events=max_events,
            acting_user_id=acting_user_id,
            remaining_events=remaining_events,
        )

    assert new_group_id is not None

    query_state, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[group_id]),
        batch_size=GROUP_REPROCESSING_CHUNK_SIZE,
        state=query_state,
        referrer="reprocessing2.reprocess_group",
    )

    if not events:
        # Need to delay this until we have enqueued all events and stopped
        # iterating over the batch query, if we take care of this in
        # finish_reprocessing it won't work, as for small max_events
        # finish_reprocessing may execute sooner than the last reprocess_group
        # iteration.
        eventstream.exclude_groups(project_id, [group_id])
        return

    remaining_event_ids = []
    remaining_events_min_datetime = None
    remaining_events_max_datetime = None

    for event in events:
        if max_events is None or max_events > 0:
            with sentry_sdk.start_span(op="reprocess_event"):
                try:
                    reprocess_event(
                        project_id=project_id,
                        event_id=event.event_id,
                        start_time=start_time,
                    )
                except CannotReprocess as e:
                    logger.error(f"reprocessing2.{e}")
                except Exception:
                    sentry_sdk.capture_exception()
                else:
                    if max_events is not None:
                        max_events -= 1

                    continue

            # In case of errors while kicking off reprocessing, mark the event
            # as reprocessed such that progressbar advances and the
            # finish_reprocessing task is still correctly spawned.
            mark_event_reprocessed(group_id=group_id, project_id=project_id)

        # In case of errors while kicking off reprocessing or if max_events has
        # been exceeded, do the default action.

        if remaining_events_min_datetime is None or remaining_events_min_datetime > event.datetime:
            remaining_events_min_datetime = event.datetime
        if remaining_events_max_datetime is None or remaining_events_max_datetime < event.datetime:
            remaining_events_max_datetime = event.datetime

        remaining_event_ids.append(event.event_id)

    # len(remaining_event_ids) is upper-bounded by GROUP_REPROCESSING_CHUNK_SIZE
    if remaining_event_ids:
        handle_remaining_events.delay(
            project_id=project_id,
            new_group_id=new_group_id,
            event_ids=remaining_event_ids,
            remaining_events=remaining_events,
            from_timestamp=remaining_events_min_datetime,
            to_timestamp=remaining_events_max_datetime,
        )

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        new_group_id=new_group_id,
        query_state=query_state,
        start_time=start_time,
        max_events=max_events,
        remaining_events=remaining_events,
    )
コード例 #4
0
def reprocess_group(
    project_id,
    group_id,
    remaining_events="delete",
    new_group_id=None,
    query_state=None,
    start_time=None,
    max_events=None,
    acting_user_id=None,
):
    sentry_sdk.set_tag("project", project_id)
    sentry_sdk.set_tag("group_id", group_id)

    from sentry.reprocessing2 import (
        CannotReprocess,
        buffered_handle_remaining_events,
        logger,
        reprocess_event,
        start_group_reprocessing,
    )

    sentry_sdk.set_tag("is_start", "false")

    # Only executed once during reprocessing
    if start_time is None:
        assert new_group_id is None
        start_time = time.time()
        metrics.incr("events.reprocessing.start_group_reprocessing",
                     sample_rate=1.0)
        sentry_sdk.set_tag("is_start", "true")
        new_group_id = start_group_reprocessing(
            project_id,
            group_id,
            max_events=max_events,
            acting_user_id=acting_user_id,
            remaining_events=remaining_events,
        )

    assert new_group_id is not None

    query_state, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[group_id]),
        batch_size=settings.SENTRY_REPROCESSING_PAGE_SIZE,
        state=query_state,
        referrer="reprocessing2.reprocess_group",
    )

    if not events:
        # Migrate events that belong to new group generated after reprocessing
        buffered_handle_remaining_events(
            project_id=project_id,
            old_group_id=group_id,
            new_group_id=new_group_id,
            datetime_to_event=[],
            remaining_events=remaining_events,
            force_flush_batch=True,
        )

        return

    remaining_event_ids = []

    for event in events:
        if max_events is None or max_events > 0:
            with sentry_sdk.start_span(op="reprocess_event"):
                try:
                    reprocess_event(
                        project_id=project_id,
                        event_id=event.event_id,
                        start_time=start_time,
                    )
                except CannotReprocess as e:
                    logger.error(f"reprocessing2.{e}")
                except Exception:
                    sentry_sdk.capture_exception()
                else:
                    if max_events is not None:
                        max_events -= 1

                    continue

        # In case of errors while kicking off reprocessing or if max_events has
        # been exceeded, do the default action.

        remaining_event_ids.append((event.datetime, event.event_id))

    # len(remaining_event_ids) is upper-bounded by settings.SENTRY_REPROCESSING_PAGE_SIZE
    if remaining_event_ids:
        buffered_handle_remaining_events(
            project_id=project_id,
            old_group_id=group_id,
            new_group_id=new_group_id,
            datetime_to_event=remaining_event_ids,
            remaining_events=remaining_events,
        )

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        new_group_id=new_group_id,
        query_state=query_state,
        start_time=start_time,
        max_events=max_events,
        remaining_events=remaining_events,
    )
コード例 #5
0
def unmerge(*posargs, **kwargs):
    args = UnmergeArgsBase.parse_arguments(*posargs, **kwargs)

    source = Group.objects.get(project_id=args.project_id, id=args.source_id)

    caches = get_caches()

    project = caches["Project"](args.project_id)

    # On the first iteration of this loop, we clear out all of the
    # denormalizations from the source group so that we can have a clean slate
    # for the new, repaired data.
    if isinstance(args, InitialUnmergeArgs):
        locked_primary_hashes = lock_hashes(
            args.project_id, args.source_id,
            args.replacement.primary_hashes_to_lock)
        truncate_denormalizations(project, source)
        last_event = None
    else:
        last_event = args.last_event
        locked_primary_hashes = args.locked_primary_hashes

    last_event, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[args.project_id],
                                 group_ids=[source.id]),
        batch_size=args.batch_size,
        state=last_event,
        referrer="unmerge",
    )

    # If there are no more events to process, we're done with the migration.
    if not events:
        unlock_hashes(args.project_id, locked_primary_hashes)
        for unmerge_key, (group_id,
                          eventstream_state) in args.destinations.items():
            logger.warning("Unmerge complete (eventstream state: %s)",
                           eventstream_state)
            if eventstream_state:
                args.replacement.stop_snuba_replacement(eventstream_state)
        return

    source_events = []
    destination_events = {}

    for event in events:
        unmerge_key = args.replacement.get_unmerge_key(event,
                                                       locked_primary_hashes)
        if unmerge_key is not None:
            destination_events.setdefault(unmerge_key, []).append(event)
        else:
            source_events.append(event)

    source_fields_reset = isinstance(
        args, SuccessiveUnmergeArgs) and args.source_fields_reset

    if source_events:
        if not source_fields_reset:
            source.update(
                **get_group_creation_attributes(caches, source_events))
            source_fields_reset = True
        else:
            source.update(
                **get_group_backfill_attributes(caches, source, source_events))

    destinations = dict(args.destinations)

    # XXX: This is only actually able to create a destination group and migrate
    # the group hashes if there are events that can be migrated. How do we
    # handle this if there aren't any events? We can't create a group (there
    # isn't any data to derive the aggregates from), so we'd have to mark the
    # hash as in limbo somehow...?)

    for unmerge_key, _destination_events in destination_events.items():
        destination_id, eventstream_state = destinations.get(unmerge_key) or (
            None, None)
        (destination_id, eventstream_state) = migrate_events(
            caches,
            project,
            args,
            _destination_events,
            locked_primary_hashes,
            destination_id,
            eventstream_state,
        )
        destinations[unmerge_key] = destination_id, eventstream_state

    repair_denormalizations(caches, project, events)

    new_args = SuccessiveUnmergeArgs(
        project_id=args.project_id,
        source_id=args.source_id,
        replacement=args.replacement,
        actor_id=args.actor_id,
        batch_size=args.batch_size,
        last_event=last_event,
        destinations=destinations,
        locked_primary_hashes=locked_primary_hashes,
        source_fields_reset=source_fields_reset,
    )

    unmerge.delay(**new_args.dump_arguments())