Esempio n. 1
0
    def test_simple(self):
        event_id = "a" * 32
        event_id_2 = "b" * 32
        project = self.create_project()

        node_id = Event.generate_node_id(project.id, event_id)
        node_id_2 = Event.generate_node_id(project.id, event_id_2)

        event = self.store_event(
            data={
                "event_id": event_id,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        self.store_event(
            data={
                "event_id": event_id_2,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        group = event.group
        group.update(status=GroupStatus.PENDING_DELETION)

        GroupAssignee.objects.create(group=group,
                                     project=project,
                                     user=self.user)
        GroupHash.objects.create(project=project,
                                 group=group,
                                 hash=uuid4().hex)
        GroupMeta.objects.create(group=group, key="foo", value="bar")
        GroupRedirect.objects.create(group_id=group.id, previous_group_id=1)

        assert nodestore.get(node_id)
        assert nodestore.get(node_id_2)

        with self.tasks():
            delete_groups(object_ids=[group.id])

        assert not GroupRedirect.objects.filter(group_id=group.id).exists()
        assert not GroupHash.objects.filter(group_id=group.id).exists()
        assert not Group.objects.filter(id=group.id).exists()
        assert not nodestore.get(node_id)
        assert not nodestore.get(node_id_2)
Esempio n. 2
0
def tombstone_events(project_id, group_id, event_ids):
    """
    Delete associated per-event data: nodestore, event attachments, user
    reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doccomment in sentry.reprocessing2.
    """

    from sentry.reprocessing2 import delete_unprocessed_events

    models.EventAttachment.objects.filter(project_id=project_id,
                                          event_id__in=event_ids).delete()
    models.UserReport.objects.filter(project_id=project_id,
                                     event_id__in=event_ids).delete()

    # Remove from nodestore
    node_ids = [
        Event.generate_node_id(project_id, event_id) for event_id in event_ids
    ]
    nodestore.delete_multi(node_ids)

    delete_unprocessed_events(project_id, event_ids)

    # Tell Snuba to delete the event data.
    eventstream.tombstone_events(project_id, event_ids)
def _process_snuba_results(query_res, group: Group, user):
    event_ids = {
        row["latest_event_id"]: Event.generate_node_id(group.project_id,
                                                       row["latest_event_id"])
        for row in query_res
    }

    node_data = nodestore.get_multi(list(event_ids.values()))

    response = []

    for row in query_res:
        response_item = {
            "hash": row["new_materialized_hash"],
            "eventCount": row["event_count"],
        }
        event_id = row["latest_event_id"]
        event_data = node_data.get(event_ids[event_id], None)

        if event_data is not None:
            event = Event(group.project_id,
                          event_id,
                          group_id=group.id,
                          data=event_data)
            response_item["latestEvent"] = serialize(event, user,
                                                     EventSerializer())

        response.append(response_item)

    return response
Esempio n. 4
0
def handle_remaining_events(project_id, new_group_id, event_ids,
                            remaining_events, from_timestamp, to_timestamp):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doc comment in sentry.reprocessing2.
    """

    from sentry import buffer
    from sentry.models.group import Group
    from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(project_id=project_id,
                               event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(
                project_id=project_id,
                event_id__in=event_ids).update(group_id=new_group_id)

        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )

        buffer.incr(Group, {"times_seen": len(event_ids)},
                    {"id": new_group_id})
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")
Esempio n. 5
0
def fetch_and_store(line):
    project_id, event_id = line.strip().split("\t")
    node_id = Event.generate_node_id(project_id, event_id)
    node = nodestore.get(node_id)  # pylint: disable=no-member

    if node is None:
        print("WARNING: Got None from nodestore for project / event",
              project_id,
              event_id,
              file=sys.stderr)
    else:
        store(project_id, event_id, node, global_output_dir)
Esempio n. 6
0
    def test_simple(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        with self.tasks():
            event_id = raven.captureMessage("internal client test")

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))

        assert event["project"] == settings.SENTRY_PROJECT
        assert event["event_id"] == event_id
        assert event["logentry"]["formatted"] == "internal client test"
Esempio n. 7
0
    def chunk(self):
        conditions = []
        if self.last_event is not None:
            conditions.extend(
                [
                    ["timestamp", "<=", self.last_event.timestamp],
                    [
                        ["timestamp", "<", self.last_event.timestamp],
                        ["event_id", "<", self.last_event.event_id],
                    ],
                ]
            )

        events = eventstore.get_unfetched_events(
            filter=eventstore.Filter(
                conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id]
            ),
            limit=self.DEFAULT_CHUNK_SIZE,
            referrer="deletions.group",
            orderby=["-timestamp", "-event_id"],
        )

        if not events:
            return False

        self.last_event = events[-1]

        # Remove from nodestore
        node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events]
        nodestore.delete_multi(node_ids)

        from sentry.reprocessing2 import delete_unprocessed_events

        delete_unprocessed_events(events)

        # Remove EventAttachment and UserReport *again* as those may not have a
        # group ID, therefore there may be dangling ones after "regular" model
        # deletion.
        event_ids = [event.event_id for event in events]
        models.EventAttachment.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()
        models.UserReport.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()

        return True
Esempio n. 8
0
    def test_encoding(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        class NotJSONSerializable:
            pass

        with self.tasks():
            event_id = raven.captureMessage(
                "check the req", extra={"request": NotJSONSerializable()}
            )

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))

        assert event["project"] == settings.SENTRY_PROJECT
        assert event["logentry"]["formatted"] == "check the req"
        assert "NotJSONSerializable" in event["extra"]["request"]
Esempio n. 9
0
    def test_recursion_breaker(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        # If this test terminates at all then we avoided recursion.
        with self.tasks():
            with mock.patch(
                "sentry.event_manager.EventManager.save", side_effect=ValueError("oh no!")
            ) as save:
                event_id = raven.captureMessage("internal client test")

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))
        assert event is None

        assert_mock_called_once_with_partial(
            save, settings.SENTRY_PROJECT, cache_key=u"e:{}:1".format(event_id)
        )
Esempio n. 10
0
    def test_dupe_message_id(self, eventstream_insert):
        # Saves the latest event to nodestore and eventstream
        project_id = 1
        event_id = "a" * 32
        node_id = Event.generate_node_id(project_id, event_id)

        manager = EventManager(make_event(event_id=event_id, message="first"))
        manager.normalize()
        manager.save(project_id)
        assert nodestore.get(node_id)["logentry"]["formatted"] == "first"

        manager = EventManager(make_event(event_id=event_id, message="second"))
        manager.normalize()
        manager.save(project_id)
        assert nodestore.get(node_id)["logentry"]["formatted"] == "second"

        assert eventstream_insert.call_count == 2
    def get(self, request: Request, organization) -> Response:
        """
        Generate a list of data scrubbing selectors from existing event data.

        This list is used to auto-complete settings in "Data Scrubbing" /
        "Security and Privacy" settings.
        """

        event_id = request.GET.get("eventId", None)

        # For organization settings we access all projects the user has access
        # to. For the project level, `get_projects` will give us back a single
        # project.
        #
        # Filtering by the projects that self.get_projects returns deals with
        # permission concerns.
        #
        # The org-wide search for the event ID is quite slow, but we cannot fix
        # that without product redesign.
        projects = self.get_projects(request, organization)
        project_ids = [project.id for project in projects]

        suggestions = {}

        if event_id:
            # go to nodestore directly instead of eventstore.get_events, which
            # would not return transaction events
            node_ids = [
                Event.generate_node_id(p, event_id) for p in project_ids
            ]
            all_data = nodestore.get_multi(node_ids)

            for data in filter(None, all_data.values()):
                for selector in pii_selector_suggestions_from_event(data):
                    examples_ = suggestions.setdefault(selector["path"], [])
                    if selector["value"]:
                        examples_.append(selector["value"])

        return Response({
            "suggestions": [{
                "type": "value",
                "value": value,
                "examples": examples
            } for value, examples in suggestions.items()]
        })
Esempio n. 12
0
def handle_remaining_events(project_id, new_group_id, event_ids,
                            remaining_events, from_timestamp, to_timestamp):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doccomment in sentry.reprocessing2.
    """

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        models.EventAttachment.objects.filter(project_id=project_id,
                                              event_id__in=event_ids).delete()
        models.UserReport.objects.filter(project_id=project_id,
                                         event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")
Esempio n. 13
0
def _process_snuba_results(query_res, group: Group, id: int, user):
    event_ids = {
        row["latest_event_id"]: Event.generate_node_id(group.project_id,
                                                       row["latest_event_id"])
        for row in query_res
    }

    node_data = nodestore.get_multi(list(event_ids.values()))

    response = []

    for row in query_res:
        response_item = {
            "hash": row["new_materialized_hash"],
            "eventCount": row["event_count"],
        }
        event_id = row["latest_event_id"]
        event_data = node_data.get(event_ids[event_id], None)

        if event_data is not None:
            event = Event(group.project_id,
                          event_id,
                          group_id=group.id,
                          data=event_data)
            response_item["latestEvent"] = serialize(event, user,
                                                     EventSerializer())

            tree_label = get_path(
                event_data, "hierarchical_tree_labels", id) or get_path(
                    event_data, "hierarchical_tree_labels", -1)

            # Rough approximation of what happens with Group title
            event_type = get_event_type(event.data)
            metadata = dict(event.get_event_metadata())
            metadata["current_tree_label"] = tree_label
            # Force rendering of grouping tree labels irrespective of platform
            metadata["display_title_with_tree_label"] = True
            title = event_type.get_title(metadata)
            response_item["title"] = title or event.title
            response_item["metadata"] = metadata

        response.append(response_item)

    return response
Esempio n. 14
0
    def chunk(self):
        conditions = []
        if self.last_event is not None:
            conditions.extend([
                ["timestamp", "<=", self.last_event.timestamp],
                [
                    ["timestamp", "<", self.last_event.timestamp],
                    ["event_id", "<", self.last_event.event_id],
                ],
            ])

        events = eventstore.get_unfetched_events(
            filter=eventstore.Filter(conditions=conditions,
                                     project_ids=[self.project_id],
                                     group_ids=[self.group_id]),
            limit=self.DEFAULT_CHUNK_SIZE,
            referrer="deletions.group",
            orderby=["-timestamp", "-event_id"],
        )

        if not events:
            return False

        self.last_event = events[-1]

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(self.project_id, event.event_id)
            for event in events
        ]
        nodestore.delete_multi(node_ids)

        delete_unprocessed_events(events)

        # Remove EventAttachment and UserReport
        event_ids = [event.event_id for event in events]
        EventAttachment.objects.filter(event_id__in=event_ids,
                                       project_id=self.project_id).delete()
        UserReport.objects.filter(event_id__in=event_ids,
                                  project_id=self.project_id).delete()

        return True
Esempio n. 15
0
def pull_event_data(project_id, event_id) -> ReprocessableEvent:
    from sentry.lang.native.processing import get_required_attachment_types

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        raise CannotReprocess("event.not_found")

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(
                project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    # Check data after checking presence of event to avoid too many instances.
    if data is None:
        raise CannotReprocess("unprocessed_event.not_found")

    required_attachment_types = get_required_attachment_types(data)
    attachments = list(
        models.EventAttachment.objects.filter(
            project_id=project_id,
            event_id=event_id,
            type__in=list(required_attachment_types)))
    missing_attachment_types = required_attachment_types - {
        ea.type
        for ea in attachments
    }

    if missing_attachment_types:
        raise CannotReprocess("attachment.not_found")

    return ReprocessableEvent(event=event, data=data, attachments=attachments)
Esempio n. 16
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.tasks.store import preprocess_event_from_reprocessing
    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        logger.error(
            "reprocessing2.event.not_found", extra={"project_id": project_id, "event_id": event_id}
        )
        return

    if data is None:
        logger.error(
            "reprocessing2.reprocessing_nodestore.not_found",
            extra={"project_id": project_id, "event_id": event_id},
        )
        # We have no real data for reprocessing. We assume this event goes
        # straight to save_event, and hope that the event data can be
        # reingested like that. It's better than data loss.
        #
        # XXX: Ideally we would run a "save-lite" for this that only updates
        # the group ID in-place. Like a snuba merge message.
        data = dict(event.data)

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id)
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache
    queryset = models.EventAttachment.objects.filter(project_id=project_id, event_id=event_id)
    files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in queryset])}

    attachment_objects = []

    for attachment_id, attachment in enumerate(queryset):
        with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    file=files[attachment.file_id],
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                )
            )

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(
        cache_key=cache_key, start_time=start_time, event_id=event_id
    )
Esempio n. 17
0
    def setUp(self):
        super(DeleteGroupTest, self).setUp()
        self.event_id = "a" * 32
        self.event_id2 = "b" * 32
        self.event_id3 = "c" * 32

        self.project = self.create_project()

        self.event = self.store_event(
            data={
                "event_id": self.event_id,
                "tags": {
                    "foo": "bar"
                },
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=self.project.id,
        )

        self.store_event(
            data={
                "event_id": self.event_id2,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=self.project.id,
        )

        self.store_event(
            data={
                "event_id": self.event_id3,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group2"],
            },
            project_id=self.project.id,
        )
        group = self.event.group

        UserReport.objects.create(group_id=group.id,
                                  project_id=self.event.project_id,
                                  name="With group id")
        UserReport.objects.create(event_id=self.event.event_id,
                                  project_id=self.event.project_id,
                                  name="With event id")
        EventAttachment.objects.create(
            event_id=self.event.event_id,
            project_id=self.event.project_id,
            file=File.objects.create(name="hello.png", type="image/png"),
            name="hello.png",
        )
        GroupAssignee.objects.create(group=group,
                                     project=self.project,
                                     user=self.user)
        GroupHash.objects.create(project=self.project,
                                 group=group,
                                 hash=uuid4().hex)
        GroupMeta.objects.create(group=group, key="foo", value="bar")
        GroupRedirect.objects.create(group_id=group.id, previous_group_id=1)

        self.node_id = Event.generate_node_id(self.project.id, self.event_id)
        self.node_id2 = Event.generate_node_id(self.project.id, self.event_id2)
        self.node_id3 = Event.generate_node_id(self.project.id, self.event_id3)
Esempio n. 18
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT
    from sentry.lang.native.processing import get_required_attachment_types
    from sentry.tasks.store import preprocess_event_from_reprocessing

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    if data is None:
        raise CannotReprocess("reprocessing_nodestore.not_found")

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        raise CannotReprocess("event.not_found")

    required_attachment_types = get_required_attachment_types(data)
    attachments = list(
        models.EventAttachment.objects.filter(
            project_id=project_id, event_id=event_id, type__in=list(required_attachment_types)
        )
    )
    missing_attachment_types = required_attachment_types - {ea.type for ea in attachments}

    if missing_attachment_types:
        raise CannotReprocess(
            f"attachment.not_found.{'_and_'.join(sorted(missing_attachment_types))}"
        )

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id)
    set_path(
        data, "contexts", "reprocessing", "original_primary_hash", value=event.get_primary_hash()
    )
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache. Note that we can only
    # consider minidumps because filestore just stays as-is after reprocessing
    # (we simply update group_id on the EventAttachment models in post_process)
    attachment_objects = []

    files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in attachments])}

    for attachment_id, attachment in enumerate(attachments):
        with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    file=files[attachment.file_id],
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                )
            )

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        data=data,
    )
Esempio n. 19
0
def handle_remaining_events(
    project_id,
    new_group_id,
    remaining_events,
    # TODO(markus): Should be mandatory arguments.
    event_ids_redis_key=None,
    old_group_id=None,
    # TODO(markus): Deprecated arguments, can remove in next version.
    event_ids=None,
    from_timestamp=None,
    to_timestamp=None,
):
    """
    Delete or merge/move associated per-event data: nodestore, event
    attachments, user reports. Mark the event as "tombstoned" in Snuba.

    This is not full event deletion. Snuba can still only delete entire groups,
    however we must only run this task for event IDs that we don't intend to
    reuse for reprocessed events. An event ID that is once tombstoned cannot be
    inserted over in eventstream.

    See doc comment in sentry.reprocessing2.
    """

    from sentry import buffer
    from sentry.models.group import Group
    from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE, pop_batched_events_from_redis

    if event_ids_redis_key is not None:
        event_ids, from_timestamp, to_timestamp = pop_batched_events_from_redis(
            event_ids_redis_key)

    metrics.timing(
        "events.reprocessing.handle_remaining_events.batch_size",
        len(event_ids),
        sample_rate=1.0,
    )

    assert remaining_events in ("delete", "keep")

    if remaining_events == "delete":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(project_id=project_id,
                               event_id__in=event_ids).delete()

        # Remove from nodestore
        node_ids = [
            Event.generate_node_id(project_id, event_id)
            for event_id in event_ids
        ]
        nodestore.delete_multi(node_ids)

        # Tell Snuba to delete the event data.
        eventstream.tombstone_events_unsafe(project_id,
                                            event_ids,
                                            from_timestamp=from_timestamp,
                                            to_timestamp=to_timestamp)
    elif remaining_events == "keep":
        for cls in EVENT_MODELS_TO_MIGRATE:
            cls.objects.filter(
                project_id=project_id,
                event_id__in=event_ids).update(group_id=new_group_id)

        eventstream.replace_group_unsafe(
            project_id,
            event_ids,
            new_group_id=new_group_id,
            from_timestamp=from_timestamp,
            to_timestamp=to_timestamp,
        )

        buffer.incr(Group, {"times_seen": len(event_ids)},
                    {"id": new_group_id})
    else:
        raise ValueError(
            f"Invalid value for remaining_events: {remaining_events}")

    if old_group_id is not None:
        from sentry.reprocessing2 import mark_event_reprocessed

        mark_event_reprocessed(group_id=old_group_id,
                               project_id=project_id,
                               num_events=len(event_ids))
Esempio n. 20
0
def capture_nodestore_stats(cache_key, project_id, event_id):
    set_current_project(project_id)

    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    node_id = Event.generate_node_id(project_id, event_id)
    data = nodestore.get(node_id)

    if not data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    old_event_size = _json_size(data)

    unprocessed_data = event_processing_store.get(
        _get_unprocessed_key(cache_key))
    event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))

    tags = {
        "with_reprocessing": bool(unprocessed_data),
        "platform": data.get("platform") or "none",
        "is_minidump": is_minidump_event(data),
    }

    if unprocessed_data:
        metrics.incr("nodestore_stats.with_reprocessing")

        concatenated_size = _json_size(data, unprocessed_data)
        metrics.timing("events.size.concatenated",
                       concatenated_size,
                       tags=tags)
        metrics.timing("events.size.concatenated.ratio",
                       concatenated_size / old_event_size,
                       tags=tags)

        _data = dict(data)
        _data["__nodestore_reprocessing"] = unprocessed_data
        simple_concatenated_size = _json_size(_data)
        metrics.timing("events.size.simple_concatenated",
                       simple_concatenated_size,
                       tags=tags)
        metrics.timing(
            "events.size.simple_concatenated.ratio",
            simple_concatenated_size / old_event_size,
            tags=tags,
        )
    else:
        metrics.incr("nodestore_stats.without_reprocessing")

    new_data, extra_keys = deduplicate(dict(data))
    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits", tags=tags)
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses", tags=tags)
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size, tags=tags)
    metrics.timing("events.size.deduplicated.total_written",
                   total_size,
                   tags=tags)

    metrics.timing("events.size.deduplicated.ratio",
                   event_size / old_event_size,
                   tags=tags)
    metrics.timing("events.size.deduplicated.total_written.ratio",
                   total_size / old_event_size,
                   tags=tags)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )