コード例 #1
0
def unmerge(
    project_id,
    source_id,
    destination_id,
    fingerprints,
    actor_id,
    last_event=None,
    batch_size=500,
    source_fields_reset=False,
    eventstream_state=None,
):
    # XXX: The queryset chunking logic below is awfully similar to
    # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to
    # be run without iteration by passing around a state object and we could
    # just use that here instead.

    source = Group.objects.get(project_id=project_id, id=source_id)

    # On the first iteration of this loop, we clear out all of the
    # denormalizations from the source group so that we can have a clean slate
    # for the new, repaired data.
    if last_event is None:
        fingerprints = lock_hashes(project_id, source_id, fingerprints)
        truncate_denormalizations(source)

    caches = get_caches()

    project = caches["Project"](project_id)

    # We process events sorted in descending order by -timestamp, -event_id. We need
    # to include event_id as well as timestamp in the ordering criteria since:
    #
    # - Event timestamps are rounded to the second so multiple events are likely
    # to have the same timestamp.
    #
    # - When sorting by timestamp alone, Snuba may not give us a deterministic
    # order for events with the same timestamp.
    #
    # - We need to ensure that we do not skip any events between batches. If we
    # only sorted by timestamp < last_event.timestamp it would be possible to
    # have missed an event with the same timestamp as the last item in the
    # previous batch.

    conditions = []
    if last_event is not None:
        conditions.extend(
            [
                ["timestamp", "<=", last_event["timestamp"]],
                [
                    ["timestamp", "<", last_event["timestamp"]],
                    ["event_id", "<", last_event["event_id"]],
                ],
            ]
        )

    events = eventstore.get_events(
        filter_keys={"project_id": [project_id], "issue": [source.id]},
        # We need the text-only "search message" from Snuba, not the raw message
        # dict field from nodestore.
        additional_columns=[eventstore.Columns.MESSAGE],
        conditions=conditions,
        limit=batch_size,
        referrer="unmerge",
        orderby=["-timestamp", "-event_id"],
    )

    # If there are no more events to process, we're done with the migration.
    if not events:
        tagstore.update_group_tag_key_values_seen(project_id, [source_id, destination_id])
        unlock_hashes(project_id, fingerprints)
        logger.warning("Unmerge complete (eventstream state: %s)", eventstream_state)
        if eventstream_state:
            eventstream.end_unmerge(eventstream_state)

        return destination_id

    Event.objects.bind_nodes(events, "data")

    source_events = []
    destination_events = []

    for event in events:
        (destination_events if get_fingerprint(event) in fingerprints else source_events).append(
            event
        )

    if source_events:
        if not source_fields_reset:
            source.update(**get_group_creation_attributes(caches, source_events))
            source_fields_reset = True
        else:
            source.update(**get_group_backfill_attributes(caches, source, source_events))

    (destination_id, eventstream_state) = migrate_events(
        caches,
        project,
        source_id,
        destination_id,
        fingerprints,
        destination_events,
        actor_id,
        eventstream_state,
    )

    repair_denormalizations(caches, project, events)

    unmerge.delay(
        project_id,
        source_id,
        destination_id,
        fingerprints,
        actor_id,
        last_event={"timestamp": events[-1].timestamp, "event_id": events[-1].event_id},
        batch_size=batch_size,
        source_fields_reset=source_fields_reset,
        eventstream_state=eventstream_state,
    )
コード例 #2
0
ファイル: unmerge.py プロジェクト: NuttasitBoonwat/sentry
def unmerge(project_id,
            source_id,
            destination_id,
            fingerprints,
            actor_id,
            cursor=None,
            batch_size=500,
            source_fields_reset=False):
    # XXX: The queryset chunking logic below is awfully similar to
    # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to
    # be run without iteration by passing around a state object and we could
    # just use that here instead.

    source = Group.objects.get(
        project_id=project_id,
        id=source_id,
    )

    # On the first iteration of this loop, we clear out all of the
    # denormalizations from the source group so that we can have a clean slate
    # for the new, repaired data.
    if cursor is None:
        fingerprints = lock_hashes(project_id, source_id, fingerprints)
        truncate_denormalizations(source)

    caches = get_caches()

    project = caches['Project'](project_id)

    # We fetch the events in descending order by their primary key to get the
    # best approximation of the most recently received events.
    queryset = Event.objects.filter(
        project_id=project_id,
        group_id=source_id,
    ).order_by('-id')

    if cursor is not None:
        queryset = queryset.filter(id__lt=cursor)

    events = list(queryset[:batch_size])

    # If there are no more events to process, we're done with the migration.
    if not events:
        tagstore.update_group_tag_key_values_seen([source_id, destination_id])
        unlock_hashes(project_id, fingerprints)
        return destination_id

    Event.objects.bind_nodes(events, 'data')

    source_events = []
    destination_events = []

    for event in events:
        (destination_events if get_fingerprint(event) in fingerprints else
         source_events).append(event)

    if source_events:
        if not source_fields_reset:
            source.update(**get_group_creation_attributes(
                caches,
                source_events,
            ))
            source_fields_reset = True
        else:
            source.update(**get_group_backfill_attributes(
                caches,
                source,
                source_events,
            ))

    destination_id = migrate_events(
        caches,
        project,
        source_id,
        destination_id,
        fingerprints,
        destination_events,
        actor_id,
    )

    repair_denormalizations(
        caches,
        project,
        events,
    )

    unmerge.delay(
        project_id,
        source_id,
        destination_id,
        fingerprints,
        actor_id,
        cursor=events[-1].id,
        batch_size=batch_size,
        source_fields_reset=source_fields_reset,
    )
コード例 #3
0
ファイル: unmerge.py プロジェクト: Kayle009/sentry
def unmerge(
    project_id,
    source_id,
    destination_id,
    fingerprints,
    actor_id,
    cursor=None,
    batch_size=500,
    source_fields_reset=False,
    eventstream_state=None,
):
    # XXX: The queryset chunking logic below is awfully similar to
    # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to
    # be run without iteration by passing around a state object and we could
    # just use that here instead.

    source = Group.objects.get(
        project_id=project_id,
        id=source_id,
    )

    # On the first iteration of this loop, we clear out all of the
    # denormalizations from the source group so that we can have a clean slate
    # for the new, repaired data.
    if cursor is None:
        fingerprints = lock_hashes(project_id, source_id, fingerprints)
        truncate_denormalizations(source)

    caches = get_caches()

    project = caches['Project'](project_id)

    # We fetch the events in descending order by their primary key to get the
    # best approximation of the most recently received events.
    queryset = Event.objects.filter(
        project_id=project_id,
        group_id=source_id,
    ).order_by('-id')

    if cursor is not None:
        queryset = queryset.filter(id__lt=cursor)

    events = list(queryset[:batch_size])

    # If there are no more events to process, we're done with the migration.
    if not events:
        tagstore.update_group_tag_key_values_seen(project_id, [source_id, destination_id])
        unlock_hashes(project_id, fingerprints)

        logger.warning('Unmerge complete (eventstream state: %s)', eventstream_state)
        if eventstream_state:
            eventstream.end_unmerge(eventstream_state)

        return destination_id

    Event.objects.bind_nodes(events, 'data')

    source_events = []
    destination_events = []

    for event in events:
        (destination_events
         if get_fingerprint(event) in fingerprints else source_events).append(event)

    if source_events:
        if not source_fields_reset:
            source.update(**get_group_creation_attributes(
                caches,
                source_events,
            ))
            source_fields_reset = True
        else:
            source.update(**get_group_backfill_attributes(
                caches,
                source,
                source_events,
            ))

    (destination_id, eventstream_state) = migrate_events(
        caches,
        project,
        source_id,
        destination_id,
        fingerprints,
        destination_events,
        actor_id,
        eventstream_state,
    )

    repair_denormalizations(
        caches,
        project,
        events,
    )

    unmerge.delay(
        project_id,
        source_id,
        destination_id,
        fingerprints,
        actor_id,
        cursor=events[-1].id,
        batch_size=batch_size,
        source_fields_reset=source_fields_reset,
        eventstream_state=eventstream_state,
    )