def unmerge( project_id, source_id, destination_id, fingerprints, actor_id, last_event=None, batch_size=500, source_fields_reset=False, eventstream_state=None, ): # XXX: The queryset chunking logic below is awfully similar to # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to # be run without iteration by passing around a state object and we could # just use that here instead. source = Group.objects.get(project_id=project_id, id=source_id) # On the first iteration of this loop, we clear out all of the # denormalizations from the source group so that we can have a clean slate # for the new, repaired data. if last_event is None: fingerprints = lock_hashes(project_id, source_id, fingerprints) truncate_denormalizations(source) caches = get_caches() project = caches["Project"](project_id) # We process events sorted in descending order by -timestamp, -event_id. We need # to include event_id as well as timestamp in the ordering criteria since: # # - Event timestamps are rounded to the second so multiple events are likely # to have the same timestamp. # # - When sorting by timestamp alone, Snuba may not give us a deterministic # order for events with the same timestamp. # # - We need to ensure that we do not skip any events between batches. If we # only sorted by timestamp < last_event.timestamp it would be possible to # have missed an event with the same timestamp as the last item in the # previous batch. conditions = [] if last_event is not None: conditions.extend( [ ["timestamp", "<=", last_event["timestamp"]], [ ["timestamp", "<", last_event["timestamp"]], ["event_id", "<", last_event["event_id"]], ], ] ) events = eventstore.get_events( filter_keys={"project_id": [project_id], "issue": [source.id]}, # We need the text-only "search message" from Snuba, not the raw message # dict field from nodestore. additional_columns=[eventstore.Columns.MESSAGE], conditions=conditions, limit=batch_size, referrer="unmerge", orderby=["-timestamp", "-event_id"], ) # If there are no more events to process, we're done with the migration. if not events: tagstore.update_group_tag_key_values_seen(project_id, [source_id, destination_id]) unlock_hashes(project_id, fingerprints) logger.warning("Unmerge complete (eventstream state: %s)", eventstream_state) if eventstream_state: eventstream.end_unmerge(eventstream_state) return destination_id Event.objects.bind_nodes(events, "data") source_events = [] destination_events = [] for event in events: (destination_events if get_fingerprint(event) in fingerprints else source_events).append( event ) if source_events: if not source_fields_reset: source.update(**get_group_creation_attributes(caches, source_events)) source_fields_reset = True else: source.update(**get_group_backfill_attributes(caches, source, source_events)) (destination_id, eventstream_state) = migrate_events( caches, project, source_id, destination_id, fingerprints, destination_events, actor_id, eventstream_state, ) repair_denormalizations(caches, project, events) unmerge.delay( project_id, source_id, destination_id, fingerprints, actor_id, last_event={"timestamp": events[-1].timestamp, "event_id": events[-1].event_id}, batch_size=batch_size, source_fields_reset=source_fields_reset, eventstream_state=eventstream_state, )
def unmerge(project_id, source_id, destination_id, fingerprints, actor_id, cursor=None, batch_size=500, source_fields_reset=False): # XXX: The queryset chunking logic below is awfully similar to # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to # be run without iteration by passing around a state object and we could # just use that here instead. source = Group.objects.get( project_id=project_id, id=source_id, ) # On the first iteration of this loop, we clear out all of the # denormalizations from the source group so that we can have a clean slate # for the new, repaired data. if cursor is None: fingerprints = lock_hashes(project_id, source_id, fingerprints) truncate_denormalizations(source) caches = get_caches() project = caches['Project'](project_id) # We fetch the events in descending order by their primary key to get the # best approximation of the most recently received events. queryset = Event.objects.filter( project_id=project_id, group_id=source_id, ).order_by('-id') if cursor is not None: queryset = queryset.filter(id__lt=cursor) events = list(queryset[:batch_size]) # If there are no more events to process, we're done with the migration. if not events: tagstore.update_group_tag_key_values_seen([source_id, destination_id]) unlock_hashes(project_id, fingerprints) return destination_id Event.objects.bind_nodes(events, 'data') source_events = [] destination_events = [] for event in events: (destination_events if get_fingerprint(event) in fingerprints else source_events).append(event) if source_events: if not source_fields_reset: source.update(**get_group_creation_attributes( caches, source_events, )) source_fields_reset = True else: source.update(**get_group_backfill_attributes( caches, source, source_events, )) destination_id = migrate_events( caches, project, source_id, destination_id, fingerprints, destination_events, actor_id, ) repair_denormalizations( caches, project, events, ) unmerge.delay( project_id, source_id, destination_id, fingerprints, actor_id, cursor=events[-1].id, batch_size=batch_size, source_fields_reset=source_fields_reset, )
def unmerge( project_id, source_id, destination_id, fingerprints, actor_id, cursor=None, batch_size=500, source_fields_reset=False, eventstream_state=None, ): # XXX: The queryset chunking logic below is awfully similar to # ``RangeQuerySetWrapper``. Ideally that could be refactored to be able to # be run without iteration by passing around a state object and we could # just use that here instead. source = Group.objects.get( project_id=project_id, id=source_id, ) # On the first iteration of this loop, we clear out all of the # denormalizations from the source group so that we can have a clean slate # for the new, repaired data. if cursor is None: fingerprints = lock_hashes(project_id, source_id, fingerprints) truncate_denormalizations(source) caches = get_caches() project = caches['Project'](project_id) # We fetch the events in descending order by their primary key to get the # best approximation of the most recently received events. queryset = Event.objects.filter( project_id=project_id, group_id=source_id, ).order_by('-id') if cursor is not None: queryset = queryset.filter(id__lt=cursor) events = list(queryset[:batch_size]) # If there are no more events to process, we're done with the migration. if not events: tagstore.update_group_tag_key_values_seen(project_id, [source_id, destination_id]) unlock_hashes(project_id, fingerprints) logger.warning('Unmerge complete (eventstream state: %s)', eventstream_state) if eventstream_state: eventstream.end_unmerge(eventstream_state) return destination_id Event.objects.bind_nodes(events, 'data') source_events = [] destination_events = [] for event in events: (destination_events if get_fingerprint(event) in fingerprints else source_events).append(event) if source_events: if not source_fields_reset: source.update(**get_group_creation_attributes( caches, source_events, )) source_fields_reset = True else: source.update(**get_group_backfill_attributes( caches, source, source_events, )) (destination_id, eventstream_state) = migrate_events( caches, project, source_id, destination_id, fingerprints, destination_events, actor_id, eventstream_state, ) repair_denormalizations( caches, project, events, ) unmerge.delay( project_id, source_id, destination_id, fingerprints, actor_id, cursor=events[-1].id, batch_size=batch_size, source_fields_reset=source_fields_reset, eventstream_state=eventstream_state, )