Beispiel #1
0
    def test_simple(self):
        event_id = "a" * 32
        project = self.create_project()
        node_id = Event.generate_node_id(project.id, event_id)
        group = self.create_group(project=project)
        event = self.create_event(group=group, event_id=event_id)
        EventAttachment.objects.create(
            event_id=event.event_id,
            project_id=event.project_id,
            file=File.objects.create(name="hello.png", type="image/png"),
            name="hello.png",
        )
        UserReport.objects.create(
            event_id=event.event_id, project_id=event.project_id, name="Jane Bloggs"
        )
        assert nodestore.get(node_id) is not None
        deletion = ScheduledDeletion.schedule(event, days=0)
        deletion.update(in_progress=True)

        with self.tasks():
            run_deletion(deletion.id)

        assert not Event.objects.filter(id=event.id).exists()
        assert not EventAttachment.objects.filter(
            event_id=event.event_id, project_id=project.id
        ).exists()
        assert not UserReport.objects.filter(
            event_id=event.event_id, project_id=project.id
        ).exists()

        assert nodestore.get(node_id) is None
Beispiel #2
0
    def test_event_node_id(self):
        # Create an event without specifying node_id. A node_id should be generated
        e1 = Event(project_id=1, event_id="abc", data={"foo": "bar"})
        assert e1.data.id is not None, "We should have generated a node_id for this event"
        e1_node_id = e1.data.id
        e1.data.save()
        e1_body = nodestore.get(e1_node_id)
        assert e1_body == {
            "foo": "bar"
        }, "The event body should be in nodestore"

        e1 = Event(project_id=1, event_id="abc")

        assert e1.data.data == {
            "foo": "bar"
        }, "The event body should be loaded from nodestore"
        assert e1.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Event with no data should not be saved to nodestore
        e2 = Event(project_id=1, event_id="mno", data=None)
        e2_node_id = e2.data.id
        assert e2.data.data == {}  # NodeData returns {} by default
        eventstore.bind_nodes([e2], "data")
        assert e2.data.data == {}
        e2_body = nodestore.get(e2_node_id)
        assert e2_body is None
Beispiel #3
0
    def setUp(self):
        super(SnubaEventTest, self).setUp()

        self.event_id = "f" * 32
        self.now = datetime.utcnow().replace(microsecond=0) - timedelta(
            seconds=10)
        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name="test")
        self.proj1group1 = self.create_group(self.proj1,
                                             first_seen=self.now,
                                             last_seen=self.now +
                                             timedelta(seconds=14400))

        # Raw event data
        self.data = {
            "event_id": self.event_id,
            "primary_hash": "1" * 32,
            "project_id": self.proj1.id,
            "message": "message 1",
            "platform": "python",
            "timestamp": calendar.timegm(self.now.timetuple()),
            "received": calendar.timegm(self.now.timetuple()),
            "tags": {
                "foo": "bar",
                "baz": "quux",
                "environment": "prod",
                "sentry:user": u"id:user1",
                "sentry:release": "release1",
            },
            "user": {
                "id": u"user1",
                "email": u"*****@*****.**"
            },
        }

        # Create a regular django Event from the data, which will save the.
        # data in nodestore too. Once Postgres events are deprecated, we can
        # turn this off and just put the payload in nodestore.
        make_django_event = True
        if make_django_event:
            self.create_event(
                event_id=self.data["event_id"],
                datetime=self.now,
                project=self.proj1,
                group=self.proj1group1,
                data=self.data,
            )
            nodestore_data = nodestore.get(
                SnubaEvent.generate_node_id(self.proj1.id, self.event_id))
            assert self.data["event_id"] == nodestore_data["event_id"]
        else:
            node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
            nodestore.set(node_id, self.data)
            assert nodestore.get(node_id) == self.data
Beispiel #4
0
    def setUp(self):
        super(SnubaEventTest, self).setUp()

        self.event_id = 'f' * 32
        self.now = datetime.utcnow().replace(microsecond=0) - timedelta(
            seconds=10)
        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name='test')
        self.proj1group1 = self.create_group(self.proj1,
                                             first_seen=self.now,
                                             last_seen=self.now +
                                             timedelta(seconds=14400))

        # Raw event data
        data = {
            'event_id': self.event_id,
            'primary_hash': '1' * 32,
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'timestamp': calendar.timegm(self.now.timetuple()),
            'received': calendar.timegm(self.now.timetuple()),
            'tags': {
                'foo': 'bar',
                'baz': 'quux',
                'environment': 'prod',
                'sentry:user': u'id:user1',
                'sentry:release': 'release1',
            },
            'user': {
                'id': u'user1',
                'email': u'*****@*****.**',
            },
        }

        # Create a regular django Event from the data, which will save the.
        # data in nodestore too. Once Postgres events are deprecated, we can
        # turn this off and just put the payload in nodestore.
        make_django_event = True
        if make_django_event:
            self.create_event(
                event_id=data['event_id'],
                datetime=self.now,
                project=self.proj1,
                group=self.proj1group1,
                data=data,
            )
            nodestore_data = nodestore.get(
                SnubaEvent.generate_node_id(self.proj1.id, self.event_id))
            assert data['event_id'] == nodestore_data['event_id']
        else:
            node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
            nodestore.set(node_id, data)
            assert nodestore.get(node_id) == data
Beispiel #5
0
    def test_event_node_id(self):
        # Create an event without specifying node_id. A node_id should be generated
        e1 = Event(project_id=1, event_id='abc', data={'foo': 'bar'})
        e1.save()
        e1_node_id = e1.data.id
        assert e1.data.id is not None, "We should have generated a node_id for this event"
        e1_body = nodestore.get(e1_node_id)
        assert e1_body == {'foo': 'bar'}, "The event body should be in nodestore"

        e1 = Event.objects.get(project_id=1, event_id='abc')
        assert e1.data.data == {'foo': 'bar'}, "The event body should be loaded from nodestore"
        assert e1.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create another event that references the same nodestore object as the first event.
        e2 = Event(project_id=1, event_id='def', data={'node_id': e1_node_id})
        assert e2.data.id == e1_node_id, "The event should use the provided node_id"
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {'foo': 'bar'}, "The event body should be in nodestore already"
        e2.save()
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {'foo': 'bar'}, "The event body should not be overwritten by save"

        e2 = Event.objects.get(project_id=1, event_id='def')
        assert e2.data.data == {'foo': 'bar'}, "The event body should be loaded from nodestore"
        assert e2.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create an event with a new event body that specifies the node_id to use.
        e3 = Event(project_id=1, event_id='ghi', data={'baz': 'quux', 'node_id': '1:ghi'})
        assert e3.data.id == '1:ghi', "Event should have the specified node_id"
        assert e3.data.data == {'baz': 'quux'}, "Event body should be the one provided (sans node_id)"
        e3.save()
        e3_body = nodestore.get('1:ghi')
        assert e3_body == {'baz': 'quux'}, "Event body should be saved to nodestore"

        e3 = Event.objects.get(project_id=1, event_id='ghi')
        assert e3.data.data == {'baz': 'quux'}, "Event body should be loaded from nodestore"
        assert e3.data.id == '1:ghi', "Loaded event should have the correct node_id"

        # Try load it again, but using the pickled/compressed string we would expect to find
        # in the column
        e3_pickled_id = compress(pickle.dumps({'node_id': '1:ghi'}))
        e3 = Event(project_id=1, event_id='jkl', data=e3_pickled_id)
        assert e3.data.data == {'baz': 'quux'}, "Event body should be loaded from nodestore"

        # Event with no data should not be saved (or loaded) from nodestore
        e4 = Event(project_id=1, event_id='mno', data=None)
        e4.save()
        assert nodestore.get('1:mno') is None, "We should not have saved anything to nodestore"
        e4 = Event.objects.get(project_id=1, event_id='mno')
        assert e4.data.id is None
        assert e4.data.data == {}  # NodeData returns {} by default
        Event.objects.bind_nodes([e4], 'data')
        assert e4.data.id is None
        assert e4.data.data == {}
Beispiel #6
0
    def test_simple(self):
        event_id = "a" * 32
        event_id_2 = "b" * 32
        project = self.create_project()

        node_id = Event.generate_node_id(project.id, event_id)
        node_id_2 = Event.generate_node_id(project.id, event_id_2)

        event = self.store_event(
            data={
                "event_id": event_id,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        self.store_event(
            data={
                "event_id": event_id_2,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        group = event.group
        group.update(status=GroupStatus.PENDING_DELETION)

        GroupAssignee.objects.create(group=group,
                                     project=project,
                                     user=self.user)
        GroupHash.objects.create(project=project,
                                 group=group,
                                 hash=uuid4().hex)
        GroupMeta.objects.create(group=group, key="foo", value="bar")
        GroupRedirect.objects.create(group_id=group.id, previous_group_id=1)

        assert nodestore.get(node_id)
        assert nodestore.get(node_id_2)

        with self.tasks():
            delete_groups(object_ids=[group.id])

        assert not Event.objects.filter(id=event.id).exists()
        assert not GroupRedirect.objects.filter(group_id=group.id).exists()
        assert not GroupHash.objects.filter(group_id=group.id).exists()
        assert not Group.objects.filter(id=group.id).exists()
        assert not nodestore.get(node_id)
        assert not nodestore.get(node_id_2)
Beispiel #7
0
    def _get_event_from_storage(self, project_id, event_id):
        nodestore_sample_rate = options.get("store.nodestore-sample-rate")
        use_nodestore = random.random() < nodestore_sample_rate

        if use_nodestore:
            start = time.time()

            node_data = nodestore.get(
                Event.generate_node_id(project_id, event_id))

            metrics.timing(
                "events.store.nodestore.duration",
                int((time.time() - start) * 1000),
                tags={"duplicate_found": bool(node_data)},
            )

            if node_data:
                return Event(node_data)
        else:
            try:
                event = Event.objects.get(project_id=project_id,
                                          event_id=event_id)
                return event
            except Event.DoesNotExist:
                pass
        return None
    def test_event_with_no_body(self):
        # remove the event from nodestore to simulate an event with no body.
        node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
        nodestore.delete(node_id)
        assert nodestore.get(node_id) is None

        # Check that we can still serialize it
        event = SnubaEvent.get_event(self.proj1.id, self.event_id)
        serialized = serialize(event)
        assert event.data == {}

        # Check that the regular serializer still gives us back tags
        assert serialized['tags'] == [{
            '_meta': None,
            'key': 'baz',
            'value': 'quux'
        }, {
            '_meta': None,
            'key': 'foo',
            'value': 'bar'
        }, {
            '_meta': None,
            'key': 'release',
            'value': 'release1'
        }, {
            '_meta': None,
            'key': 'user',
            'query': 'user.id:user1',
            'value': 'id:user1'
        }]
Beispiel #9
0
    def test_dupe_message_id(self, eventstream_insert):
        # Saves the latest event to nodestore and eventstream
        project_id = 1
        event_id = "a" * 32
        node_id = Event.generate_node_id(project_id, event_id)

        manager = EventManager(make_event(event_id=event_id, message="first"))
        manager.normalize()
        manager.save(project_id)
        assert nodestore.get(node_id)["logentry"]["formatted"] == "first"

        manager = EventManager(make_event(event_id=event_id, message="second"))
        manager.normalize()
        manager.save(project_id)
        assert nodestore.get(node_id)["logentry"]["formatted"] == "second"

        assert eventstream_insert.call_count == 2
Beispiel #10
0
    def test_simple(self):
        event_id = "a" * 32
        project = self.create_project()
        node_id = Event.generate_node_id(project.id, event_id)
        group = self.create_group(project=project)
        event = self.create_event(group=group, event_id=event_id)
        EventAttachment.objects.create(
            event_id=event.event_id,
            project_id=event.project_id,
            file=File.objects.create(name="hello.png", type="image/png"),
            name="hello.png",
        )
        UserReport.objects.create(event_id=event.event_id,
                                  project_id=event.project_id,
                                  name="Jane Doe")
        key = "key"
        value = "value"
        tk = tagstore.create_tag_key(project_id=project.id,
                                     environment_id=self.environment.id,
                                     key=key)
        tv = tagstore.create_tag_value(project_id=project.id,
                                       environment_id=self.environment.id,
                                       key=key,
                                       value=value)
        tagstore.create_event_tags(
            event_id=event.id,
            group_id=group.id,
            project_id=project.id,
            environment_id=self.environment.id,
            tags=[(tk.key, tv.value)],
        )
        assert nodestore.get(node_id) is not None
        deletion = ScheduledDeletion.schedule(event, days=0)
        deletion.update(in_progress=True)

        with self.tasks():
            run_deletion(deletion.id)

        assert not Event.objects.filter(id=event.id).exists()
        assert not EventAttachment.objects.filter(
            event_id=event.event_id, project_id=project.id).exists()
        assert not UserReport.objects.filter(event_id=event.event_id,
                                             project_id=project.id).exists()
        assert not EventTag.objects.filter(event_id=event.id).exists()

        assert nodestore.get(node_id) is None
Beispiel #11
0
    def bind_node_data(self):
        # Do not rebind if node_data is already loaded
        if self.data._node_data:
            return

        node_id = Event.generate_node_id(self.project_id, self.event_id)
        node_data = nodestore.get(node_id) or {}
        ref = self.data.get_ref(self)
        self.data.bind_data(node_data, ref=ref)
Beispiel #12
0
def capture_nodestore_stats(project_id, event_id):
    set_current_project(project_id)

    from sentry import nodestore
    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    event = Event(project_id=project_id, event_id=event_id)
    old_event_size = _json_size(dict(event.data))

    if not event.data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    platform = event.platform

    for key, value in six.iteritems(event.interfaces):
        len_value = _json_size(value.to_json())
        metrics.timing(
            "events.size.interface", len_value, tags={"interface": key, "platform": platform}
        )

    new_data, extra_keys = deduplicate(dict(event.data))

    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits")
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses")
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size)
    metrics.timing("events.size.deduplicated.total_written", total_size)

    metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size)
    metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )
Beispiel #13
0
    def test_simple(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        with self.tasks():
            event_id = raven.captureMessage("internal client test")

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))

        assert event["project"] == settings.SENTRY_PROJECT
        assert event["event_id"] == event_id
        assert event["logentry"]["formatted"] == "internal client test"
Beispiel #14
0
def fetch_and_store(line):
    project_id, event_id = line.strip().split("\t")
    node_id = Event.generate_node_id(project_id, event_id)
    node = nodestore.get(node_id)  # pylint: disable=no-member

    if node is None:
        print("WARNING: Got None from nodestore for project / event",
              project_id,
              event_id,
              file=sys.stderr)
    else:
        store(project_id, event_id, node, global_output_dir)
Beispiel #15
0
    def data(self):
        if self._node_data is not None:
            return self._node_data

        elif self.id:
            if settings.DEBUG:
                raise NodeUnpopulated('You should populate node data before accessing it.')
            else:
                warnings.warn('You should populate node data before accessing it.')
            self.bind_data(nodestore.get(self.id) or {})
            return self._node_data

        return {}
Beispiel #16
0
    def data(self):
        if self._node_data is not None:
            return self._node_data

        elif self.id:
            if settings.DEBUG:
                raise NodeUnpopulated('You should populate node data before accessing it.')
            else:
                warnings.warn('You should populate node data before accessing it.')
            self.bind_data(nodestore.get(self.id) or {})
            return self._node_data

        return {}
Beispiel #17
0
def pull_event_data(project_id, event_id) -> ReprocessableEvent:
    from sentry.lang.native.processing import get_required_attachment_types

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        raise CannotReprocess("event.not_found")

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(
                project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    # Check data after checking presence of event to avoid too many instances.
    if data is None:
        raise CannotReprocess("unprocessed_event.not_found")

    required_attachment_types = get_required_attachment_types(data)
    attachments = list(
        models.EventAttachment.objects.filter(
            project_id=project_id,
            event_id=event_id,
            type__in=list(required_attachment_types)))
    missing_attachment_types = required_attachment_types - {
        ea.type
        for ea in attachments
    }

    if missing_attachment_types:
        raise CannotReprocess("attachment.not_found")

    return ReprocessableEvent(event=event, data=data, attachments=attachments)
Beispiel #18
0
    def data(self):
        """
        Get the current data object, fetching from nodestore if necessary.
        """

        if self._node_data is not None:
            return self._node_data

        elif self.id:
            self.bind_data(nodestore.get(self.id) or {})
            return self._node_data

        rv = {}
        if self.wrapper is not None:
            rv = self.wrapper(rv)
        return rv
Beispiel #19
0
    def data(self):
        """
        Get the current data object, fetching from nodestore if necessary.
        """

        if self._node_data is not None:
            return self._node_data

        elif self.id:
            warnings.warn('You should populate node data before accessing it.')
            self.bind_data(nodestore.get(self.id) or {})
            return self._node_data

        rv = {}
        if self.field is not None and self.field.wrapper is not None:
            rv = self.field.wrapper(rv)
        return rv
Beispiel #20
0
    def test_encoding(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        class NotJSONSerializable:
            pass

        with self.tasks():
            event_id = raven.captureMessage(
                "check the req", extra={"request": NotJSONSerializable()}
            )

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))

        assert event["project"] == settings.SENTRY_PROJECT
        assert event["logentry"]["formatted"] == "check the req"
        assert "NotJSONSerializable" in event["extra"]["request"]
Beispiel #21
0
    def test_recursion_breaker(self):
        configure_sdk()
        Hub.current.bind_client(Hub.main.client)

        # If this test terminates at all then we avoided recursion.
        with self.tasks():
            with mock.patch(
                "sentry.event_manager.EventManager.save", side_effect=ValueError("oh no!")
            ) as save:
                event_id = raven.captureMessage("internal client test")

        event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id))
        assert event is None

        assert_mock_called_once_with_partial(
            save, settings.SENTRY_PROJECT, cache_key=u"e:{}:1".format(event_id)
        )
Beispiel #22
0
    def data(self):
        """
        Get the current data object, fetching from nodestore if necessary.
        """

        if self._node_data is not None:
            return self._node_data

        elif self.id:
            warnings.warn('You should populate node data before accessing it.')
            self.bind_data(nodestore.get(self.id) or {})
            return self._node_data

        rv = {}
        if self.field is not None and self.field.wrapper is not None:
            rv = self.field.wrapper(rv)
        return rv
Beispiel #23
0
    def test_event_with_no_body(self):
        # remove the event from nodestore to simulate an event with no body.
        node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
        nodestore.delete(node_id)
        assert nodestore.get(node_id) is None

        # Check that we can still serialize it
        event = eventstore.get_event_by_id(
            self.proj1.id, self.event_id, additional_columns=eventstore.full_columns
        )
        serialized = serialize(event)
        assert event.data == {}

        # Check that the regular serializer still gives us back tags
        assert serialized["tags"] == [
            {"_meta": None, "key": "baz", "value": "quux"},
            {"_meta": None, "key": "environment", "value": "prod"},
            {"_meta": None, "key": "foo", "value": "bar"},
            {"_meta": None, "key": "level", "value": "error"},
            {"_meta": None, "key": "release", "value": "release1"},
            {"_meta": None, "key": "user", "query": 'user.id:"user1"', "value": "id:user1"},
        ]
Beispiel #24
0
    def test_simple(self):
        EventDataDeletionTask.DEFAULT_CHUNK_SIZE = 1  # test chunking logic
        group = self.event.group
        assert nodestore.get(self.node_id)
        assert nodestore.get(self.node_id2)
        assert nodestore.get(self.node_id3)

        with self.tasks():
            delete_groups(object_ids=[group.id])

        assert not UserReport.objects.filter(group_id=group.id).exists()
        assert not UserReport.objects.filter(
            event_id=self.event.event_id).exists()
        assert not EventAttachment.objects.filter(
            event_id=self.event.event_id).exists()

        assert not GroupRedirect.objects.filter(group_id=group.id).exists()
        assert not GroupHash.objects.filter(group_id=group.id).exists()
        assert not Group.objects.filter(id=group.id).exists()
        assert not nodestore.get(self.node_id)
        assert not nodestore.get(self.node_id2)
        assert nodestore.get(
            self.node_id3), "Does not remove from second group"
Beispiel #25
0
 def bind_node_data(self):
     node_id = Event.generate_node_id(self.project_id, self.event_id)
     node_data = nodestore.get(node_id) or {}
     ref = self.data.get_ref(self)
     self.data.bind_data(node_data, ref=ref)
Beispiel #26
0
def reprocess_event(project_id, event_id, start_time):
    node_id = _generate_unprocessed_event_node_id(project_id=project_id,
                                                  event_id=event_id)

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        data = nodestore.get(node_id)
    if data is None:
        return

    from sentry.event_manager import set_tag
    from sentry.tasks.store import preprocess_event_from_reprocessing
    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT

    # Take unprocessed data from old event and save it as unprocessed data
    # under a new event ID. The second step happens in pre-process. We could
    # save the "original event ID" instead and get away with writing less to
    # nodestore, but doing it this way makes the logic slightly simpler.

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    orig_event_id = data["event_id"]
    set_tag(data, "original_event_id", orig_event_id)

    event = eventstore.get_event_by_id(project_id, orig_event_id)
    if event is None:
        return

    set_tag(data, "original_group_id", event.group_id)

    # XXX: reuse event IDs
    event_id = data["event_id"] = uuid.uuid4().hex

    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache
    queryset = models.EventAttachment.objects.filter(
        project_id=project_id, event_id=orig_event_id).select_related("file")

    attachment_objects = []

    for attachment_id, attachment in enumerate(queryset):
        with sentry_sdk.start_span(
                op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                ))

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key,
                                 attachments=attachment_objects,
                                 timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(cache_key=cache_key,
                                       start_time=start_time,
                                       event_id=event_id)
Beispiel #27
0
    def test_simple(self):
        EventDataDeletionTask.DEFAULT_CHUNK_SIZE = 1  # test chunking logic
        event_id = "a" * 32
        event_id2 = "b" * 32
        event_id3 = "c" * 32
        project = self.create_project()
        node_id = Event.generate_node_id(project.id, event_id)
        node_id2 = Event.generate_node_id(project.id, event_id2)
        node_id3 = Event.generate_node_id(project.id, event_id3)

        event = self.store_event(
            data={
                "event_id": event_id,
                "tags": {
                    "foo": "bar"
                },
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        self.store_event(
            data={
                "event_id": event_id2,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group1"],
            },
            project_id=project.id,
        )

        self.store_event(
            data={
                "event_id": event_id3,
                "timestamp": iso_format(before_now(minutes=1)),
                "fingerprint": ["group2"],
            },
            project_id=project.id,
        )

        group = event.group
        group.update(status=GroupStatus.PENDING_DELETION)

        project = self.create_project()

        UserReport.objects.create(group_id=group.id,
                                  project_id=event.project_id,
                                  name="With group id")
        UserReport.objects.create(event_id=event.event_id,
                                  project_id=event.project_id,
                                  name="With event id")
        EventAttachment.objects.create(
            event_id=event.event_id,
            project_id=event.project_id,
            file=File.objects.create(name="hello.png", type="image/png"),
            name="hello.png",
        )

        GroupAssignee.objects.create(group=group,
                                     project=project,
                                     user=self.user)
        GroupHash.objects.create(project=project,
                                 group=group,
                                 hash=uuid4().hex)
        GroupMeta.objects.create(group=group, key="foo", value="bar")
        GroupRedirect.objects.create(group_id=group.id, previous_group_id=1)

        deletion = ScheduledDeletion.schedule(group, days=0)
        deletion.update(in_progress=True)

        assert nodestore.get(node_id)
        assert nodestore.get(node_id2)
        assert nodestore.get(node_id3)

        with self.tasks():
            run_deletion(deletion.id)

        assert not Event.objects.filter(id=event.id).exists()
        assert not UserReport.objects.filter(group_id=group.id).exists()
        assert not UserReport.objects.filter(event_id=event.event_id).exists()
        assert not EventAttachment.objects.filter(
            event_id=event.event_id).exists()

        assert not GroupRedirect.objects.filter(group_id=group.id).exists()
        assert not GroupHash.objects.filter(group_id=group.id).exists()
        assert not Group.objects.filter(id=group.id).exists()
        assert not nodestore.get(node_id)
        assert not nodestore.get(node_id2)
        assert nodestore.get(node_id3), "Does not remove from second group"
Beispiel #28
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT
    from sentry.lang.native.processing import get_required_attachment_types
    from sentry.tasks.store import preprocess_event_from_reprocessing

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    if data is None:
        raise CannotReprocess("reprocessing_nodestore.not_found")

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        raise CannotReprocess("event.not_found")

    required_attachment_types = get_required_attachment_types(data)
    attachments = list(
        models.EventAttachment.objects.filter(
            project_id=project_id, event_id=event_id, type__in=list(required_attachment_types)
        )
    )
    missing_attachment_types = required_attachment_types - {ea.type for ea in attachments}

    if missing_attachment_types:
        raise CannotReprocess(
            f"attachment.not_found.{'_and_'.join(sorted(missing_attachment_types))}"
        )

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id)
    set_path(
        data, "contexts", "reprocessing", "original_primary_hash", value=event.get_primary_hash()
    )
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache. Note that we can only
    # consider minidumps because filestore just stays as-is after reprocessing
    # (we simply update group_id on the EventAttachment models in post_process)
    attachment_objects = []

    files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in attachments])}

    for attachment_id, attachment in enumerate(attachments):
        with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    file=files[attachment.file_id],
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                )
            )

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(
        cache_key=cache_key,
        start_time=start_time,
        event_id=event_id,
        data=data,
    )
Beispiel #29
0
def capture_nodestore_stats(cache_key, project_id, event_id):
    set_current_project(project_id)

    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    node_id = Event.generate_node_id(project_id, event_id)
    data = nodestore.get(node_id)

    if not data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    old_event_size = _json_size(data)

    unprocessed_data = event_processing_store.get(
        _get_unprocessed_key(cache_key))
    event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))

    tags = {
        "with_reprocessing": bool(unprocessed_data),
        "platform": data.get("platform") or "none",
        "is_minidump": is_minidump_event(data),
    }

    if unprocessed_data:
        metrics.incr("nodestore_stats.with_reprocessing")

        concatenated_size = _json_size(data, unprocessed_data)
        metrics.timing("events.size.concatenated",
                       concatenated_size,
                       tags=tags)
        metrics.timing("events.size.concatenated.ratio",
                       concatenated_size / old_event_size,
                       tags=tags)

        _data = dict(data)
        _data["__nodestore_reprocessing"] = unprocessed_data
        simple_concatenated_size = _json_size(_data)
        metrics.timing("events.size.simple_concatenated",
                       simple_concatenated_size,
                       tags=tags)
        metrics.timing(
            "events.size.simple_concatenated.ratio",
            simple_concatenated_size / old_event_size,
            tags=tags,
        )
    else:
        metrics.incr("nodestore_stats.without_reprocessing")

    new_data, extra_keys = deduplicate(dict(data))
    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits", tags=tags)
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses", tags=tags)
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size, tags=tags)
    metrics.timing("events.size.deduplicated.total_written",
                   total_size,
                   tags=tags)

    metrics.timing("events.size.deduplicated.ratio",
                   event_size / old_event_size,
                   tags=tags)
    metrics.timing("events.size.deduplicated.total_written.ratio",
                   total_size / old_event_size,
                   tags=tags)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )
Beispiel #30
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.event_manager import set_tag
    from sentry.tasks.store import preprocess_event_from_reprocessing
    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT

    # Take unprocessed data from old event and save it as unprocessed data
    # under a new event ID. The second step happens in pre-process. We could
    # save the "original event ID" instead and get away with writing less to
    # nodestore, but doing it this way makes the logic slightly simpler.
    node_id = _generate_unprocessed_event_node_id(project_id=project_id,
                                                  event_id=event_id)

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        data = nodestore.get(node_id)

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        logger.error("reprocessing2.event.not_found",
                     extra={
                         "project_id": project_id,
                         "event_id": event_id
                     })
        return

    if data is None:
        logger.error(
            "reprocessing2.reprocessing_nodestore.not_found",
            extra={
                "project_id": project_id,
                "event_id": event_id
            },
        )
        # We have no real data for reprocessing. We assume this event goes
        # straight to save_event, and hope that the event data can be
        # reingested like that. It's better than data loss.
        #
        # XXX: Ideally we would run a "save-lite" for this that only updates
        # the group ID in-place. Like a snuba merge message.
        data = dict(event.data)

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_tag(data, "original_group_id", event.group_id)
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache
    queryset = models.EventAttachment.objects.filter(
        project_id=project_id, event_id=event_id).select_related("file")

    attachment_objects = []

    for attachment_id, attachment in enumerate(queryset):
        with sentry_sdk.start_span(
                op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                ))

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key,
                                 attachments=attachment_objects,
                                 timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(cache_key=cache_key,
                                       start_time=start_time,
                                       event_id=event_id)
Beispiel #31
0
def reprocess_event(project_id, event_id, start_time):

    from sentry.tasks.store import preprocess_event_from_reprocessing
    from sentry.ingest.ingest_consumer import CACHE_TIMEOUT

    with sentry_sdk.start_span(op="reprocess_events.nodestore.get"):
        node_id = Event.generate_node_id(project_id, event_id)
        data = nodestore.get(node_id, subkey="unprocessed")
        if data is None:
            node_id = _generate_unprocessed_event_node_id(project_id=project_id, event_id=event_id)
            data = nodestore.get(node_id)

    with sentry_sdk.start_span(op="reprocess_events.eventstore.get"):
        event = eventstore.get_event_by_id(project_id, event_id)

    if event is None:
        logger.error(
            "reprocessing2.event.not_found", extra={"project_id": project_id, "event_id": event_id}
        )
        return

    if data is None:
        logger.error(
            "reprocessing2.reprocessing_nodestore.not_found",
            extra={"project_id": project_id, "event_id": event_id},
        )
        # We have no real data for reprocessing. We assume this event goes
        # straight to save_event, and hope that the event data can be
        # reingested like that. It's better than data loss.
        #
        # XXX: Ideally we would run a "save-lite" for this that only updates
        # the group ID in-place. Like a snuba merge message.
        data = dict(event.data)

    # Step 1: Fix up the event payload for reprocessing and put it in event
    # cache/event_processing_store
    set_path(data, "contexts", "reprocessing", "original_issue_id", value=event.group_id)
    cache_key = event_processing_store.store(data)

    # Step 2: Copy attachments into attachment cache
    queryset = models.EventAttachment.objects.filter(project_id=project_id, event_id=event_id)
    files = {f.id: f for f in models.File.objects.filter(id__in=[ea.file_id for ea in queryset])}

    attachment_objects = []

    for attachment_id, attachment in enumerate(queryset):
        with sentry_sdk.start_span(op="reprocess_event._copy_attachment_into_cache") as span:
            span.set_data("attachment_id", attachment.id)
            attachment_objects.append(
                _copy_attachment_into_cache(
                    attachment_id=attachment_id,
                    attachment=attachment,
                    file=files[attachment.file_id],
                    cache_key=cache_key,
                    cache_timeout=CACHE_TIMEOUT,
                )
            )

    if attachment_objects:
        with sentry_sdk.start_span(op="reprocess_event.set_attachment_meta"):
            attachment_cache.set(cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT)

    preprocess_event_from_reprocessing(
        cache_key=cache_key, start_time=start_time, event_id=event_id
    )
Beispiel #32
0
    def test_event_node_id(self):
        # Create an event without specifying node_id. A node_id should be generated
        e1 = Event(project_id=1, event_id="abc", data={"foo": "bar"})
        e1.save()
        e1_node_id = e1.data.id
        assert e1.data.id is not None, "We should have generated a node_id for this event"
        e1_body = nodestore.get(e1_node_id)
        e1.data.save()
        e1_body = nodestore.get(e1_node_id)
        assert e1_body == {
            "foo": "bar"
        }, "The event body should be in nodestore"

        e1 = Event.objects.get(project_id=1, event_id="abc")
        assert e1.data.data == {
            "foo": "bar"
        }, "The event body should be loaded from nodestore"
        assert e1.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create another event that references the same nodestore object as the first event.
        e2 = Event(project_id=1, event_id="def", data={"node_id": e1_node_id})
        assert e2.data.id == e1_node_id, "The event should use the provided node_id"
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {
            "foo": "bar"
        }, "The event body should be in nodestore already"
        e2.save()
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {
            "foo": "bar"
        }, "The event body should not be overwritten by save"

        e2 = Event.objects.get(project_id=1, event_id="def")
        assert e2.data.data == {
            "foo": "bar"
        }, "The event body should be loaded from nodestore"
        assert e2.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create an event with a new event body that specifies the node_id to use.
        e3 = Event(project_id=1,
                   event_id="ghi",
                   data={
                       "baz": "quux",
                       "node_id": "1:ghi"
                   })
        assert e3.data.id == "1:ghi", "Event should have the specified node_id"
        assert e3.data.data == {
            "baz": "quux"
        }, "Event body should be the one provided (sans node_id)"
        e3.save()
        e3_body = nodestore.get("1:ghi")
        e3.data.save()
        e3_body = nodestore.get("1:ghi")
        assert e3_body == {
            "baz": "quux"
        }, "Event body should be saved to nodestore"

        e3 = Event.objects.get(project_id=1, event_id="ghi")
        assert e3.data.data == {
            "baz": "quux"
        }, "Event body should be loaded from nodestore"
        assert e3.data.id == "1:ghi", "Loaded event should have the correct node_id"

        # Try load it again, but using the pickled/compressed string we would expect to find
        # in the column
        e3_pickled_id = compress(pickle.dumps({"node_id": "1:ghi"}))
        e3 = Event(project_id=1, event_id="jkl", data=e3_pickled_id)
        assert e3.data.data == {
            "baz": "quux"
        }, "Event body should be loaded from nodestore"

        # Event with no data should not be saved (or loaded) from nodestore
        e4 = Event(project_id=1, event_id="mno", data=None)
        e4.save()
        e4.data.save()
        assert nodestore.get(
            "1:mno") is None, "We should not have saved anything to nodestore"
        e4 = Event.objects.get(project_id=1, event_id="mno")
        assert e4.data.id is None
        assert e4.data.data == {}  # NodeData returns {} by default
        e4.bind_node_data()
        assert e4.data.id is None
        assert e4.data.data == {}
Beispiel #33
0
    def test_event_node_id(self):
        # Create an event without specifying node_id. A node_id should be generated
        e1 = Event(project_id=1, event_id='abc', data={'foo': 'bar'})
        e1.save()
        e1_node_id = e1.data.id
        assert e1.data.id is not None, "We should have generated a node_id for this event"
        e1_body = nodestore.get(e1_node_id)
        assert e1_body == {
            'foo': 'bar'
        }, "The event body should be in nodestore"

        e1 = Event.objects.get(project_id=1, event_id='abc')
        assert e1.data.data == {
            'foo': 'bar'
        }, "The event body should be loaded from nodestore"
        assert e1.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create another event that references the same nodestore object as the first event.
        e2 = Event(project_id=1, event_id='def', data={'node_id': e1_node_id})
        assert e2.data.id == e1_node_id, "The event should use the provided node_id"
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {
            'foo': 'bar'
        }, "The event body should be in nodestore already"
        e2.save()
        e2_body = nodestore.get(e1_node_id)
        assert e2_body == {
            'foo': 'bar'
        }, "The event body should not be overwritten by save"

        e2 = Event.objects.get(project_id=1, event_id='def')
        assert e2.data.data == {
            'foo': 'bar'
        }, "The event body should be loaded from nodestore"
        assert e2.data.id == e1_node_id, "The event's node_id should be the same after load"

        # Create an event with a new event body that specifies the node_id to use.
        e3 = Event(project_id=1,
                   event_id='ghi',
                   data={
                       'baz': 'quux',
                       'node_id': '1:ghi'
                   })
        assert e3.data.id == '1:ghi', "Event should have the specified node_id"
        assert e3.data.data == {
            'baz': 'quux'
        }, "Event body should be the one provided (sans node_id)"
        e3.save()
        e3_body = nodestore.get('1:ghi')
        assert e3_body == {
            'baz': 'quux'
        }, "Event body should be saved to nodestore"

        e3 = Event.objects.get(project_id=1, event_id='ghi')
        assert e3.data.data == {
            'baz': 'quux'
        }, "Event body should be loaded from nodestore"
        assert e3.data.id == '1:ghi', "Loaded event should have the correct node_id"

        # Try load it again, but using the pickled/compressed string we would expect to find
        # in the column
        e3_pickled_id = compress(pickle.dumps({'node_id': '1:ghi'}))
        e3 = Event(project_id=1, event_id='jkl', data=e3_pickled_id)
        assert e3.data.data == {
            'baz': 'quux'
        }, "Event body should be loaded from nodestore"

        # Event with no data should not be saved (or loaded) from nodestore
        e4 = Event(project_id=1, event_id='mno', data=None)
        e4.save()
        assert nodestore.get(
            '1:mno') is None, "We should not have saved anything to nodestore"
        e4 = Event.objects.get(project_id=1, event_id='mno')
        assert e4.data.id is None
        assert e4.data.data == {}  # NodeData returns {} by default
        Event.objects.bind_nodes([e4], 'data')
        assert e4.data.id is None
        assert e4.data.data == {}