예제 #1
0
def save_unprocessed_event(project, event_id):
    """
    Move event from event_processing_store into nodestore. Only call if event
    has outcome=accepted.
    """
    if not features.has("projects:reprocessing-v2", project, actor=None):
        return

    with sentry_sdk.start_span(
            op=
            "sentry.reprocessing2.save_unprocessed_event.get_unprocessed_event"
    ):
        data = event_processing_store.get(cache_key_for_event({
            "project":
            project.id,
            "event_id":
            event_id
        }),
                                          unprocessed=True)
        if data is None:
            return

    with sentry_sdk.start_span(
            op="sentry.reprocessing2.save_unprocessed_event.set_nodestore"):
        node_id = _generate_unprocessed_event_node_id(project_id=project.id,
                                                      event_id=event_id)
        nodestore.set(node_id, data)
예제 #2
0
    def setUp(self):
        super(SnubaEventTest, self).setUp()

        self.event_id = "f" * 32
        self.now = datetime.utcnow().replace(microsecond=0) - timedelta(
            seconds=10)
        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name="test")
        self.proj1group1 = self.create_group(self.proj1,
                                             first_seen=self.now,
                                             last_seen=self.now +
                                             timedelta(seconds=14400))

        # Raw event data
        self.data = {
            "event_id": self.event_id,
            "primary_hash": "1" * 32,
            "project_id": self.proj1.id,
            "message": "message 1",
            "platform": "python",
            "timestamp": calendar.timegm(self.now.timetuple()),
            "received": calendar.timegm(self.now.timetuple()),
            "tags": {
                "foo": "bar",
                "baz": "quux",
                "environment": "prod",
                "sentry:user": u"id:user1",
                "sentry:release": "release1",
            },
            "user": {
                "id": u"user1",
                "email": u"*****@*****.**"
            },
        }

        # Create a regular django Event from the data, which will save the.
        # data in nodestore too. Once Postgres events are deprecated, we can
        # turn this off and just put the payload in nodestore.
        make_django_event = True
        if make_django_event:
            self.create_event(
                event_id=self.data["event_id"],
                datetime=self.now,
                project=self.proj1,
                group=self.proj1group1,
                data=self.data,
            )
            nodestore_data = nodestore.get(
                SnubaEvent.generate_node_id(self.proj1.id, self.event_id))
            assert self.data["event_id"] == nodestore_data["event_id"]
        else:
            node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
            nodestore.set(node_id, self.data)
            assert nodestore.get(node_id) == self.data
예제 #3
0
    def setUp(self):
        super(SnubaEventTest, self).setUp()

        self.event_id = 'f' * 32
        self.now = datetime.utcnow().replace(microsecond=0) - timedelta(
            seconds=10)
        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name='test')
        self.proj1group1 = self.create_group(self.proj1,
                                             first_seen=self.now,
                                             last_seen=self.now +
                                             timedelta(seconds=14400))

        # Raw event data
        data = {
            'event_id': self.event_id,
            'primary_hash': '1' * 32,
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'timestamp': calendar.timegm(self.now.timetuple()),
            'received': calendar.timegm(self.now.timetuple()),
            'tags': {
                'foo': 'bar',
                'baz': 'quux',
                'environment': 'prod',
                'sentry:user': u'id:user1',
                'sentry:release': 'release1',
            },
            'user': {
                'id': u'user1',
                'email': u'*****@*****.**',
            },
        }

        # Create a regular django Event from the data, which will save the.
        # data in nodestore too. Once Postgres events are deprecated, we can
        # turn this off and just put the payload in nodestore.
        make_django_event = True
        if make_django_event:
            self.create_event(
                event_id=data['event_id'],
                datetime=self.now,
                project=self.proj1,
                group=self.proj1group1,
                data=data,
            )
            nodestore_data = nodestore.get(
                SnubaEvent.generate_node_id(self.proj1.id, self.event_id))
            assert data['event_id'] == nodestore_data['event_id']
        else:
            node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id)
            nodestore.set(node_id, data)
            assert nodestore.get(node_id) == data
예제 #4
0
def capture_nodestore_stats(project_id, event_id):
    set_current_project(project_id)

    from sentry import nodestore
    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    event = Event(project_id=project_id, event_id=event_id)
    old_event_size = _json_size(dict(event.data))

    if not event.data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    platform = event.platform

    for key, value in six.iteritems(event.interfaces):
        len_value = _json_size(value.to_json())
        metrics.timing(
            "events.size.interface", len_value, tags={"interface": key, "platform": platform}
        )

    new_data, extra_keys = deduplicate(dict(event.data))

    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits")
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses")
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size)
    metrics.timing("events.size.deduplicated.total_written", total_size)

    metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size)
    metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )
예제 #5
0
    def get_prep_value(self, value):
        if not value and self.null:
            # save ourselves some storage
            return None

        # TODO(dcramer): we should probably do this more intelligently
        # and manually
        if not value.id:
            value.id = nodestore.create(value.data)
        else:
            nodestore.set(value.id, value.data)

        return compress(pickle.dumps({'node_id': value.id}))
    def get_prep_value(self, value):
        if not value and self.null:
            # save ourselves some storage
            return None

        # TODO(dcramer): we should probably do this more intelligently
        # and manually
        if not value.id:
            value.id = nodestore.create(value.data)
        else:
            nodestore.set(value.id, value.data)

        return compress(pickle.dumps({'node_id': value.id}))
예제 #7
0
파일: node.py 프로젝트: webZW/sentry
    def save(self):
        """
        Write current data back to nodestore.
        """

        # We never loaded any data for reading or writing, so there
        # is nothing to save.
        if self._node_data is None:
            return

        # We can't put our wrappers into the nodestore, so we need to
        # ensure that the data is converted into a plain old dict
        to_write = self._node_data
        if isinstance(to_write, CANONICAL_TYPES):
            to_write = dict(to_write.items())

        nodestore.set(self.id, to_write)
예제 #8
0
파일: node.py 프로젝트: getsentry/sentry
    def save(self):
        """
        Write current data back to nodestore.
        """

        # We never loaded any data for reading or writing, so there
        # is nothing to save.
        if self._node_data is None:
            return

        # We can't put our wrappers into the nodestore, so we need to
        # ensure that the data is converted into a plain old dict
        to_write = self._node_data
        if isinstance(to_write, CANONICAL_TYPES):
            to_write = dict(to_write.items())

        nodestore.set(self.id, to_write)
예제 #9
0
    def get_prep_value(self, value):
        if not value and self.null:
            # save ourselves some storage
            return None

        # We can't put our wrappers into the nodestore, so we need to
        # ensure that the data is converted into a plain old dict
        data = value.data
        if isinstance(data, CANONICAL_TYPES):
            data = dict(data.items())

        # TODO(dcramer): we should probably do this more intelligently
        # and manually
        if not value.id:
            value.id = nodestore.create(data)
        else:
            nodestore.set(value.id, data)

        return compress(pickle.dumps({'node_id': value.id}))
예제 #10
0
def capture_nodestore_stats(cache_key, project_id, event_id):
    set_current_project(project_id)

    from sentry.eventstore.compressor import deduplicate
    from sentry.eventstore.models import Event

    node_id = Event.generate_node_id(project_id, event_id)
    data = nodestore.get(node_id)

    if not data:
        metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"})
        return

    old_event_size = _json_size(data)

    unprocessed_data = event_processing_store.get(
        _get_unprocessed_key(cache_key))
    event_processing_store.delete_by_key(_get_unprocessed_key(cache_key))

    tags = {
        "with_reprocessing": bool(unprocessed_data),
        "platform": data.get("platform") or "none",
        "is_minidump": is_minidump_event(data),
    }

    if unprocessed_data:
        metrics.incr("nodestore_stats.with_reprocessing")

        concatenated_size = _json_size(data, unprocessed_data)
        metrics.timing("events.size.concatenated",
                       concatenated_size,
                       tags=tags)
        metrics.timing("events.size.concatenated.ratio",
                       concatenated_size / old_event_size,
                       tags=tags)

        _data = dict(data)
        _data["__nodestore_reprocessing"] = unprocessed_data
        simple_concatenated_size = _json_size(_data)
        metrics.timing("events.size.simple_concatenated",
                       simple_concatenated_size,
                       tags=tags)
        metrics.timing(
            "events.size.simple_concatenated.ratio",
            simple_concatenated_size / old_event_size,
            tags=tags,
        )
    else:
        metrics.incr("nodestore_stats.without_reprocessing")

    new_data, extra_keys = deduplicate(dict(data))
    total_size = event_size = _json_size(new_data)

    for key, value in six.iteritems(extra_keys):
        if nodestore.get(key) is not None:
            metrics.incr("eventstore.compressor.hits", tags=tags)
            # do not continue, nodestore.set() should bump TTL
        else:
            metrics.incr("eventstore.compressor.misses", tags=tags)
            total_size += _json_size(value)

        # key is md5sum of content
        # do not store actual value to keep prod impact to a minimum
        nodestore.set(key, {})

    metrics.timing("events.size.deduplicated", event_size, tags=tags)
    metrics.timing("events.size.deduplicated.total_written",
                   total_size,
                   tags=tags)

    metrics.timing("events.size.deduplicated.ratio",
                   event_size / old_event_size,
                   tags=tags)
    metrics.timing("events.size.deduplicated.total_written.ratio",
                   total_size / old_event_size,
                   tags=tags)

    if total_size > old_event_size:
        nodestore_stats_logger.info(
            "events.size.deduplicated.details",
            extra={
                "project_id": project_id,
                "event_id": event_id,
                "total_size": total_size,
                "old_event_size": old_event_size,
            },
        )