def save_unprocessed_event(project, event_id): """ Move event from event_processing_store into nodestore. Only call if event has outcome=accepted. """ if not features.has("projects:reprocessing-v2", project, actor=None): return with sentry_sdk.start_span( op= "sentry.reprocessing2.save_unprocessed_event.get_unprocessed_event" ): data = event_processing_store.get(cache_key_for_event({ "project": project.id, "event_id": event_id }), unprocessed=True) if data is None: return with sentry_sdk.start_span( op="sentry.reprocessing2.save_unprocessed_event.set_nodestore"): node_id = _generate_unprocessed_event_node_id(project_id=project.id, event_id=event_id) nodestore.set(node_id, data)
def setUp(self): super(SnubaEventTest, self).setUp() self.event_id = "f" * 32 self.now = datetime.utcnow().replace(microsecond=0) - timedelta( seconds=10) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name="test") self.proj1group1 = self.create_group(self.proj1, first_seen=self.now, last_seen=self.now + timedelta(seconds=14400)) # Raw event data self.data = { "event_id": self.event_id, "primary_hash": "1" * 32, "project_id": self.proj1.id, "message": "message 1", "platform": "python", "timestamp": calendar.timegm(self.now.timetuple()), "received": calendar.timegm(self.now.timetuple()), "tags": { "foo": "bar", "baz": "quux", "environment": "prod", "sentry:user": u"id:user1", "sentry:release": "release1", }, "user": { "id": u"user1", "email": u"*****@*****.**" }, } # Create a regular django Event from the data, which will save the. # data in nodestore too. Once Postgres events are deprecated, we can # turn this off and just put the payload in nodestore. make_django_event = True if make_django_event: self.create_event( event_id=self.data["event_id"], datetime=self.now, project=self.proj1, group=self.proj1group1, data=self.data, ) nodestore_data = nodestore.get( SnubaEvent.generate_node_id(self.proj1.id, self.event_id)) assert self.data["event_id"] == nodestore_data["event_id"] else: node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id) nodestore.set(node_id, self.data) assert nodestore.get(node_id) == self.data
def setUp(self): super(SnubaEventTest, self).setUp() self.event_id = 'f' * 32 self.now = datetime.utcnow().replace(microsecond=0) - timedelta( seconds=10) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1group1 = self.create_group(self.proj1, first_seen=self.now, last_seen=self.now + timedelta(seconds=14400)) # Raw event data data = { 'event_id': self.event_id, 'primary_hash': '1' * 32, 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'timestamp': calendar.timegm(self.now.timetuple()), 'received': calendar.timegm(self.now.timetuple()), 'tags': { 'foo': 'bar', 'baz': 'quux', 'environment': 'prod', 'sentry:user': u'id:user1', 'sentry:release': 'release1', }, 'user': { 'id': u'user1', 'email': u'*****@*****.**', }, } # Create a regular django Event from the data, which will save the. # data in nodestore too. Once Postgres events are deprecated, we can # turn this off and just put the payload in nodestore. make_django_event = True if make_django_event: self.create_event( event_id=data['event_id'], datetime=self.now, project=self.proj1, group=self.proj1group1, data=data, ) nodestore_data = nodestore.get( SnubaEvent.generate_node_id(self.proj1.id, self.event_id)) assert data['event_id'] == nodestore_data['event_id'] else: node_id = SnubaEvent.generate_node_id(self.proj1.id, self.event_id) nodestore.set(node_id, data) assert nodestore.get(node_id) == data
def capture_nodestore_stats(project_id, event_id): set_current_project(project_id) from sentry import nodestore from sentry.eventstore.compressor import deduplicate from sentry.eventstore.models import Event event = Event(project_id=project_id, event_id=event_id) old_event_size = _json_size(dict(event.data)) if not event.data: metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"}) return platform = event.platform for key, value in six.iteritems(event.interfaces): len_value = _json_size(value.to_json()) metrics.timing( "events.size.interface", len_value, tags={"interface": key, "platform": platform} ) new_data, extra_keys = deduplicate(dict(event.data)) total_size = event_size = _json_size(new_data) for key, value in six.iteritems(extra_keys): if nodestore.get(key) is not None: metrics.incr("eventstore.compressor.hits") # do not continue, nodestore.set() should bump TTL else: metrics.incr("eventstore.compressor.misses") total_size += _json_size(value) # key is md5sum of content # do not store actual value to keep prod impact to a minimum nodestore.set(key, {}) metrics.timing("events.size.deduplicated", event_size) metrics.timing("events.size.deduplicated.total_written", total_size) metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size) metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size) if total_size > old_event_size: nodestore_stats_logger.info( "events.size.deduplicated.details", extra={ "project_id": project_id, "event_id": event_id, "total_size": total_size, "old_event_size": old_event_size, }, )
def get_prep_value(self, value): if not value and self.null: # save ourselves some storage return None # TODO(dcramer): we should probably do this more intelligently # and manually if not value.id: value.id = nodestore.create(value.data) else: nodestore.set(value.id, value.data) return compress(pickle.dumps({'node_id': value.id}))
def save(self): """ Write current data back to nodestore. """ # We never loaded any data for reading or writing, so there # is nothing to save. if self._node_data is None: return # We can't put our wrappers into the nodestore, so we need to # ensure that the data is converted into a plain old dict to_write = self._node_data if isinstance(to_write, CANONICAL_TYPES): to_write = dict(to_write.items()) nodestore.set(self.id, to_write)
def get_prep_value(self, value): if not value and self.null: # save ourselves some storage return None # We can't put our wrappers into the nodestore, so we need to # ensure that the data is converted into a plain old dict data = value.data if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) # TODO(dcramer): we should probably do this more intelligently # and manually if not value.id: value.id = nodestore.create(data) else: nodestore.set(value.id, data) return compress(pickle.dumps({'node_id': value.id}))
def capture_nodestore_stats(cache_key, project_id, event_id): set_current_project(project_id) from sentry.eventstore.compressor import deduplicate from sentry.eventstore.models import Event node_id = Event.generate_node_id(project_id, event_id) data = nodestore.get(node_id) if not data: metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"}) return old_event_size = _json_size(data) unprocessed_data = event_processing_store.get( _get_unprocessed_key(cache_key)) event_processing_store.delete_by_key(_get_unprocessed_key(cache_key)) tags = { "with_reprocessing": bool(unprocessed_data), "platform": data.get("platform") or "none", "is_minidump": is_minidump_event(data), } if unprocessed_data: metrics.incr("nodestore_stats.with_reprocessing") concatenated_size = _json_size(data, unprocessed_data) metrics.timing("events.size.concatenated", concatenated_size, tags=tags) metrics.timing("events.size.concatenated.ratio", concatenated_size / old_event_size, tags=tags) _data = dict(data) _data["__nodestore_reprocessing"] = unprocessed_data simple_concatenated_size = _json_size(_data) metrics.timing("events.size.simple_concatenated", simple_concatenated_size, tags=tags) metrics.timing( "events.size.simple_concatenated.ratio", simple_concatenated_size / old_event_size, tags=tags, ) else: metrics.incr("nodestore_stats.without_reprocessing") new_data, extra_keys = deduplicate(dict(data)) total_size = event_size = _json_size(new_data) for key, value in six.iteritems(extra_keys): if nodestore.get(key) is not None: metrics.incr("eventstore.compressor.hits", tags=tags) # do not continue, nodestore.set() should bump TTL else: metrics.incr("eventstore.compressor.misses", tags=tags) total_size += _json_size(value) # key is md5sum of content # do not store actual value to keep prod impact to a minimum nodestore.set(key, {}) metrics.timing("events.size.deduplicated", event_size, tags=tags) metrics.timing("events.size.deduplicated.total_written", total_size, tags=tags) metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size, tags=tags) metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size, tags=tags) if total_size > old_event_size: nodestore_stats_logger.info( "events.size.deduplicated.details", extra={ "project_id": project_id, "event_id": event_id, "total_size": total_size, "old_event_size": old_event_size, }, )