Ejemplo n.º 1
0
def tombstone_events(project_id, group_id, event_ids):
    """
    Delete associated per-event data: nodestore, event attachments, user
    reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doccomment in sentry.reprocessing2.
    """

    from sentry.reprocessing2 import delete_unprocessed_events

    models.EventAttachment.objects.filter(project_id=project_id,
                                          event_id__in=event_ids).delete()
    models.UserReport.objects.filter(project_id=project_id,
                                     event_id__in=event_ids).delete()

    # Remove from nodestore
    node_ids = [
        Event.generate_node_id(project_id, event_id) for event_id in event_ids
    ]
    nodestore.delete_multi(node_ids)

    delete_unprocessed_events(project_id, event_ids)

    # Tell Snuba to delete the event data.
    eventstream.tombstone_events(project_id, event_ids)
Ejemplo n.º 2
0
def handle_remaining_events(project_id, new_group_id, event_ids,
                            remaining_events, from_timestamp, to_timestamp):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doc comment in sentry.reprocessing2.
    """

    from sentry import buffer
    from sentry.models.group import Group
    from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(project_id=project_id,
                               event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(
                project_id=project_id,
                event_id__in=event_ids).update(group_id=new_group_id)

        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )

        buffer.incr(Group, {"times_seen": len(event_ids)},
                    {"id": new_group_id})
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")
Ejemplo n.º 3
0
    def chunk(self):
        conditions = []
        if self.last_event is not None:
            conditions.extend(
                [
                    ["timestamp", "<=", self.last_event.timestamp],
                    [
                        ["timestamp", "<", self.last_event.timestamp],
                        ["event_id", "<", self.last_event.event_id],
                    ],
                ]
            )

        events = eventstore.get_unfetched_events(
            filter=eventstore.Filter(
                conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id]
            ),
            limit=self.DEFAULT_CHUNK_SIZE,
            referrer="deletions.group",
            orderby=["-timestamp", "-event_id"],
        )

        if not events:
            return False

        self.last_event = events[-1]

        # Remove from nodestore
        node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events]
        nodestore.delete_multi(node_ids)

        from sentry.reprocessing2 import delete_unprocessed_events

        delete_unprocessed_events(events)

        # Remove EventAttachment and UserReport *again* as those may not have a
        # group ID, therefore there may be dangling ones after "regular" model
        # deletion.
        event_ids = [event.event_id for event in events]
        models.EventAttachment.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()
        models.UserReport.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()

        return True
Ejemplo n.º 4
0
def delete_events(relation,
                  transaction_id=None,
                  limit=10000,
                  chunk_limit=100,
                  logger=None):
    from sentry.models import Event, EventTag

    while limit > 0:
        result_set = list(Event.objects.filter(**relation)[:chunk_limit])
        if not bool(result_set):
            return False

        # delete objects from nodestore first
        node_ids = set(r.data.id for r in result_set if r.data.id)
        if node_ids:
            nodestore.delete_multi(node_ids)

        event_ids = [r.id for r in result_set]

        # bulk delete by id
        EventTag.objects.filter(event_id__in=event_ids).delete()
        if logger is not None:
            # The only reason this is a different log statement is that logging every
            # single event that gets deleted in the relation will destroy disks.
            logger.info('object.delete.bulk_executed',
                        extra=dict(
                            relation.items() + [
                                ('transaction_id', transaction_id),
                                ('model', 'EventTag'),
                            ], ))

        # bulk delete by id
        Event.objects.filter(id__in=event_ids).delete()
        if logger is not None:
            # The only reason this is a different log statement is that logging every
            # single event that gets deleted in the relation will destroy disks.
            logger.info('object.delete.bulk_executed',
                        extra=dict(
                            relation.items() + [
                                ('transaction_id', transaction_id),
                                ('model', 'Event'),
                            ], ))

        limit -= chunk_limit

    return True
Ejemplo n.º 5
0
def handle_remaining_events(project_id, new_group_id, event_ids,
                            remaining_events, from_timestamp, to_timestamp):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doccomment in sentry.reprocessing2.
    """

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        models.EventAttachment.objects.filter(project_id=project_id,
                                              event_id__in=event_ids).delete()
        models.UserReport.objects.filter(project_id=project_id,
                                         event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")
Ejemplo n.º 6
0
def delete_events(relation, transaction_id=None, limit=10000, chunk_limit=100, logger=None):
    from sentry.models import Event, EventTag

    while limit > 0:
        result_set = list(Event.objects.filter(**relation)[:chunk_limit])
        if not bool(result_set):
            return False

        # delete objects from nodestore first
        node_ids = set(r.data.id for r in result_set if r.data.id)
        if node_ids:
            nodestore.delete_multi(node_ids)

        event_ids = [r.id for r in result_set]

        # bulk delete by id
        EventTag.objects.filter(event_id__in=event_ids).delete()
        if logger is not None:
            # The only reason this is a different log statement is that logging every
            # single event that gets deleted in the relation will destroy disks.
            logger.info('object.delete.bulk_executed', extra=dict(
                relation.items() + [
                    ('transaction_id', transaction_id),
                    ('model', 'EventTag'),
                ],
            ))

        # bulk delete by id
        Event.objects.filter(id__in=event_ids).delete()
        if logger is not None:
            # The only reason this is a different log statement is that logging every
            # single event that gets deleted in the relation will destroy disks.
            logger.info('object.delete.bulk_executed', extra=dict(
                relation.items() + [
                    ('transaction_id', transaction_id),
                    ('model', 'Event'),
                ],
            ))

        limit -= chunk_limit

    return True
Ejemplo n.º 7
0
    def chunk(self):
        conditions = []
        if self.last_event is not None:
            conditions.extend([
                ["timestamp", "<=", self.last_event.timestamp],
                [
                    ["timestamp", "<", self.last_event.timestamp],
                    ["event_id", "<", self.last_event.event_id],
                ],
            ])

        events = eventstore.get_unfetched_events(
            filter=eventstore.Filter(conditions=conditions,
                                     project_ids=[self.project_id],
                                     group_ids=[self.group_id]),
            limit=self.DEFAULT_CHUNK_SIZE,
            referrer="deletions.group",
            orderby=["-timestamp", "-event_id"],
        )

        if not events:
            return False

        self.last_event = events[-1]

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(self.project_id, event.event_id)
            for event in events
        ]
        nodestore.delete_multi(node_ids)

        delete_unprocessed_events(events)

        # Remove EventAttachment and UserReport
        event_ids = [event.event_id for event in events]
        EventAttachment.objects.filter(event_id__in=event_ids,
                                       project_id=self.project_id).delete()
        UserReport.objects.filter(event_id__in=event_ids,
                                  project_id=self.project_id).delete()

        return True
Ejemplo n.º 8
0
 def chunk(self):
     nodestore.delete_multi(self.nodes)
     return False
Ejemplo n.º 9
0
def delete_unprocessed_events(project_id, event_ids):
    node_ids = [
        _generate_unprocessed_event_node_id(project_id, event_id)
        for event_id in event_ids
    ]
    nodestore.delete_multi(node_ids)
Ejemplo n.º 10
0
def handle_remaining_events(
    project_id,
    new_group_id,
    remaining_events,
    # TODO(markus): Should be mandatory arguments.
    event_ids_redis_key=None,
    old_group_id=None,
    # TODO(markus): Deprecated arguments, can remove in next version.
    event_ids=None,
    from_timestamp=None,
    to_timestamp=None,
):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doc comment in sentry.reprocessing2.
    """

    from sentry import buffer
    from sentry.models.group import Group
    from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE, pop_batched_events_from_redis

    if event_ids_redis_key is not None:
        event_ids, from_timestamp, to_timestamp = pop_batched_events_from_redis(
            event_ids_redis_key)

    metrics.timing(
        "events.reprocessing.handle_remaining_events.batch_size",
        len(event_ids),
        sample_rate=1.0,
    )

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(project_id=project_id,
                               event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(
                project_id=project_id,
                event_id__in=event_ids).update(group_id=new_group_id)

        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )

        buffer.incr(Group, {"times_seen": len(event_ids)},
                    {"id": new_group_id})
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")

    if old_group_id is not None:
        from sentry.reprocessing2 import mark_event_reprocessed

        mark_event_reprocessed(group_id=old_group_id,
                               project_id=project_id,
                               num_events=len(event_ids))