def _process_snuba_results(query_res, group: Group, user): event_ids = { row["latest_event_id"]: Event.generate_node_id(group.project_id, row["latest_event_id"]) for row in query_res } node_data = nodestore.get_multi(list(event_ids.values())) response = [] for row in query_res: response_item = { "hash": row["new_materialized_hash"], "eventCount": row["event_count"], } event_id = row["latest_event_id"] event_data = node_data.get(event_ids[event_id], None) if event_data is not None: event = Event(group.project_id, event_id, group_id=group.id, data=event_data) response_item["latestEvent"] = serialize(event, user, EventSerializer()) response.append(response_item) return response
def test_bind_nodes(self): """ Test that bind_nodes populates _node_data """ min_ago = iso_format(before_now(minutes=1)) self.store_event( data={"event_id": "a" * 32, "timestamp": min_ago, "user": {"id": u"user1"}}, project_id=self.project.id, ) self.store_event( data={"event_id": "b" * 32, "timestamp": min_ago, "user": {"id": u"user2"}}, project_id=self.project.id, ) event = Event(project_id=self.project.id, event_id="a" * 32) event2 = Event(project_id=self.project.id, event_id="b" * 32) assert event.data._node_data is None self.eventstorage.bind_nodes([event, event2], "data") assert event.data._node_data is not None assert event.data["user"]["id"] == u"user1" # Bind nodes is noop if node data was already fetched with mock.patch.object(nodestore, "get_multi") as mock_get_multi: self.eventstorage.bind_nodes([event, event2]) assert mock_get_multi.call_count == 0
def _process(self, filename): with open(filename, 'r') as file_: event_data = json.load(file_) event_id = event_data['event_id'] event = Event(self._project_id, event_id, group_id=None, data=event_data) flat_hashes, hierarchical_hashes = (event.get_hashes( force_config=self._config)) if not hierarchical_hashes: # Prevent events ending up in the project node hierarchical_hashes = ["NO_HASH"] item = extract_event_data(event) item['json_url'] = Path(filename).relative_to(self._event_dir) # Seems abundant to do this for every event, but it's faster # than synchronising between processes when to generate item['crash_report'] = get_crash_report(event) item['dump_variants'] = dump_variants(self._config, event) return flat_hashes, hierarchical_hashes, item
def test_bind_nodes(self): """ Test that bind_nodes populates _node_data """ min_ago = iso_format(before_now(minutes=1)) self.store_event( data={ "event_id": "a" * 32, "timestamp": min_ago, "user": { "id": u"user1" } }, project_id=self.project.id, ) self.store_event( data={ "event_id": "b" * 32, "timestamp": min_ago, "user": { "id": u"user2" } }, project_id=self.project.id, ) event = Event(project_id=self.project.id, event_id="a" * 32) event2 = Event(project_id=self.project.id, event_id="b" * 32) assert event.data._node_data is None self.eventstorage.bind_nodes([event, event2], "data") assert event.data._node_data is not None assert event.data["user"]["id"] == u"user1"
def test_event_node_id(self): # Create an event without specifying node_id. A node_id should be generated e1 = Event(project_id=1, event_id="abc", data={"foo": "bar"}) assert e1.data.id is not None, "We should have generated a node_id for this event" e1_node_id = e1.data.id e1.data.save() e1_body = nodestore.get(e1_node_id) assert e1_body == { "foo": "bar" }, "The event body should be in nodestore" e1 = Event(project_id=1, event_id="abc") assert e1.data.data == { "foo": "bar" }, "The event body should be loaded from nodestore" assert e1.data.id == e1_node_id, "The event's node_id should be the same after load" # Event with no data should not be saved to nodestore e2 = Event(project_id=1, event_id="mno", data=None) e2_node_id = e2.data.id assert e2.data.data == {} # NodeData returns {} by default eventstore.bind_nodes([e2], "data") assert e2.data.data == {} e2_body = nodestore.get(e2_node_id) assert e2_body is None
def test_simple(self): event_id = "a" * 32 event_id_2 = "b" * 32 project = self.create_project() node_id = Event.generate_node_id(project.id, event_id) node_id_2 = Event.generate_node_id(project.id, event_id_2) event = self.store_event( data={ "event_id": event_id, "timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group1"], }, project_id=project.id, ) self.store_event( data={ "event_id": event_id_2, "timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group1"], }, project_id=project.id, ) group = event.group group.update(status=GroupStatus.PENDING_DELETION) GroupAssignee.objects.create(group=group, project=project, user=self.user) GroupHash.objects.create(project=project, group=group, hash=uuid4().hex) GroupMeta.objects.create(group=group, key="foo", value="bar") GroupRedirect.objects.create(group_id=group.id, previous_group_id=1) assert nodestore.get(node_id) assert nodestore.get(node_id_2) with self.tasks(): delete_groups(object_ids=[group.id]) assert not GroupRedirect.objects.filter(group_id=group.id).exists() assert not GroupHash.objects.filter(group_id=group.id).exists() assert not Group.objects.filter(id=group.id).exists() assert not nodestore.get(node_id) assert not nodestore.get(node_id_2)
def get_task_kwargs_for_insert(operation, event_data, task_state=None): if task_state and task_state.get("skip_consume", False): return None # nothing to do event_data["datetime"] = datetime.strptime( event_data["datetime"], "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.utc) # This data is already normalized as we're currently in the # ingestion pipeline and the event was in store # normalization just a few seconds ago. Running it through # Rust (re)normalization here again would be too slow. event_data["data"] = EventDict(event_data["data"], skip_renormalization=True) event = Event( event_id=event_data["event_id"], group_id=event_data["group_id"], project_id=event_data["project_id"], ) event.data.bind_data(event_data["data"]) kwargs = {"event": event, "primary_hash": event_data["primary_hash"]} for name in ("is_new", "is_regression", "is_new_group_environment"): kwargs[name] = task_state[name] return kwargs
def tombstone_events(project_id, group_id, event_ids): """ Delete associated per-event data: nodestore, event attachments, user reports. Mark the event as "tombstoned" in Snuba. This is not full event deletion. Snuba can still only delete entire groups, however we must only run this task for event IDs that we don't intend to reuse for reprocessed events. An event ID that is once tombstoned cannot be inserted over in eventstream. See doccomment in sentry.reprocessing2. """ from sentry.reprocessing2 import delete_unprocessed_events models.EventAttachment.objects.filter(project_id=project_id, event_id__in=event_ids).delete() models.UserReport.objects.filter(project_id=project_id, event_id__in=event_ids).delete() # Remove from nodestore node_ids = [ Event.generate_node_id(project_id, event_id) for event_id in event_ids ] nodestore.delete_multi(node_ids) delete_unprocessed_events(project_id, event_ids) # Tell Snuba to delete the event data. eventstream.tombstone_events(project_id, event_ids)
def test_serialize_event(self): event = self.store_event( data={ "event_id": "a" * 32, "message": "Hello World!", "tags": { "logger": "foobar", "site": "foo", "server_name": "bar" }, }, project_id=self.project.id, ) group_id = event.group_id serialized = serialize(event) assert serialized["eventID"] == "a" * 32 assert serialized["projectID"] == six.text_type(self.project.id) assert serialized["groupID"] == six.text_type(group_id) assert serialized["message"] == "Hello World!" # Can serialize an event by loading node data event = Event(project_id=self.project.id, event_id="a" * 32, group_id=group_id) serialized = serialize(event) assert serialized["eventID"] == "a" * 32 assert serialized["projectID"] == six.text_type(self.project.id) assert serialized["groupID"] == six.text_type(group_id) assert serialized["message"] == "Hello World!"
def test_get_event_by_id_cached(self): # Simulate getting an event that exists in eventstore but has not yet been written to snuba. with mock.patch("sentry.eventstore.snuba.backend.Event") as mock_event: dummy_event = Event( project_id=self.project2.id, event_id="f" * 32, data={ "something": "hi", "timestamp": self.min_ago }, ) mock_event.return_value = dummy_event event = self.eventstore.get_event_by_id(self.project2.id, "f" * 32) # Result of query should be None assert event is None # Now we store the event properly, so it will exist in Snuba. self.store_event( data={ "event_id": "f" * 32, "timestamp": self.min_ago }, project_id=self.project2.id, ) # Make sure that the negative cache isn't causing the event to not show up event = self.eventstore.get_event_by_id(self.project2.id, "f" * 32) assert event.event_id == "f" * 32 assert event.project_id == self.project2.id assert event.group_id == event.group.id
def get_unmerge_key( self, event: Event, locked_primary_hashes: Collection[str]) -> Optional[str]: primary_hash = event.get_primary_hash() if primary_hash in self.fingerprints and primary_hash in locked_primary_hashes: return _DEFAULT_UNMERGE_KEY return None
def _process_snuba_results(query_res, group: Group, id: int, user): event_ids = { row["latest_event_id"]: Event.generate_node_id(group.project_id, row["latest_event_id"]) for row in query_res } node_data = nodestore.get_multi(list(event_ids.values())) response = [] for row in query_res: response_item = { "hash": row["new_materialized_hash"], "eventCount": row["event_count"], } event_id = row["latest_event_id"] event_data = node_data.get(event_ids[event_id], None) if event_data is not None: event = Event(group.project_id, event_id, group_id=group.id, data=event_data) response_item["latestEvent"] = serialize(event, user, EventSerializer()) tree_label = get_path( event_data, "hierarchical_tree_labels", id) or get_path( event_data, "hierarchical_tree_labels", -1) # Rough approximation of what happens with Group title event_type = get_event_type(event.data) metadata = dict(event.get_event_metadata()) metadata["current_tree_label"] = tree_label # Force rendering of grouping tree labels irrespective of platform metadata["display_title_with_tree_label"] = True title = event_type.get_title(metadata) response_item["title"] = title or event.title response_item["metadata"] = metadata response.append(response_item) return response
def handle_remaining_events(project_id, new_group_id, event_ids, remaining_events, from_timestamp, to_timestamp): """ Delete or merge/move associated per-event data: nodestore, event attachments, user reports. Mark the event as "tombstoned" in Snuba. This is not full event deletion. Snuba can still only delete entire groups, however we must only run this task for event IDs that we don't intend to reuse for reprocessed events. An event ID that is once tombstoned cannot be inserted over in eventstream. See doc comment in sentry.reprocessing2. """ from sentry import buffer from sentry.models.group import Group from sentry.reprocessing2 import EVENT_MODELS_TO_MIGRATE assert remaining_events in ("delete", "keep") if remaining_events == "delete": for cls in EVENT_MODELS_TO_MIGRATE: cls.objects.filter(project_id=project_id, event_id__in=event_ids).delete() # Remove from nodestore node_ids = [ Event.generate_node_id(project_id, event_id) for event_id in event_ids ] nodestore.delete_multi(node_ids) # Tell Snuba to delete the event data. eventstream.tombstone_events_unsafe(project_id, event_ids, from_timestamp=from_timestamp, to_timestamp=to_timestamp) elif remaining_events == "keep": for cls in EVENT_MODELS_TO_MIGRATE: cls.objects.filter( project_id=project_id, event_id__in=event_ids).update(group_id=new_group_id) eventstream.replace_group_unsafe( project_id, event_ids, new_group_id=new_group_id, from_timestamp=from_timestamp, to_timestamp=to_timestamp, ) buffer.incr(Group, {"times_seen": len(event_ids)}, {"id": new_group_id}) else: raise ValueError( f"Invalid value for remaining_events: {remaining_events}")
def test_bind_node_data(self): event = self.store_event( data={ "event_id": "a" * 32, "message": "test", "timestamp": iso_format(before_now(seconds=1)), "type": "error", }, project_id=self.project.id, ) group_id = event.group.id e1 = Event(self.project.id, "a" * 32, group_id=group_id) e1.bind_node_data() with mock.patch.object(nodestore, "get") as mock_get: event.bind_node_data() event.bind_node_data() assert mock_get.call_count == 0
def capture_nodestore_stats(project_id, event_id): set_current_project(project_id) from sentry import nodestore from sentry.eventstore.compressor import deduplicate from sentry.eventstore.models import Event event = Event(project_id=project_id, event_id=event_id) old_event_size = _json_size(dict(event.data)) if not event.data: metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"}) return platform = event.platform for key, value in six.iteritems(event.interfaces): len_value = _json_size(value.to_json()) metrics.timing( "events.size.interface", len_value, tags={"interface": key, "platform": platform} ) new_data, extra_keys = deduplicate(dict(event.data)) total_size = event_size = _json_size(new_data) for key, value in six.iteritems(extra_keys): if nodestore.get(key) is not None: metrics.incr("eventstore.compressor.hits") # do not continue, nodestore.set() should bump TTL else: metrics.incr("eventstore.compressor.misses") total_size += _json_size(value) # key is md5sum of content # do not store actual value to keep prod impact to a minimum nodestore.set(key, {}) metrics.timing("events.size.deduplicated", event_size) metrics.timing("events.size.deduplicated.total_written", total_size) metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size) metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size) if total_size > old_event_size: nodestore_stats_logger.info( "events.size.deduplicated.details", extra={ "project_id": project_id, "event_id": event_id, "total_size": total_size, "old_event_size": old_event_size, }, )
def fetch_and_store(line): project_id, event_id = line.strip().split("\t") node_id = Event.generate_node_id(project_id, event_id) node = nodestore.get(node_id) # pylint: disable=no-member if node is None: print("WARNING: Got None from nodestore for project / event", project_id, event_id, file=sys.stderr) else: store(project_id, event_id, node, global_output_dir)
def dump_variants(config, event: Event) -> str: # Copied from sentry/tests/sentry/grouping/test_variants.py rv: List[str] = [] for (key, value) in sorted( event.get_grouping_variants(force_config=config).items() ): if rv: rv.append("-" * 74) rv.append("%s:" % key) _dump_variant(value, rv, 1) return "\n".join(rv)
def test_simple(self): configure_sdk() Hub.current.bind_client(Hub.main.client) with self.tasks(): event_id = raven.captureMessage("internal client test") event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id)) assert event["project"] == settings.SENTRY_PROJECT assert event["event_id"] == event_id assert event["logentry"]["formatted"] == "internal client test"
def test_snuba_data(self): self.store_event( data={ "event_id": "a" * 32, "message": "Hello World!", "tags": { "logger": "foobar", "site": "foo", "server_name": "bar" }, "user": { "id": "test", "email": "*****@*****.**" }, "timestamp": iso_format(before_now(seconds=1)), }, project_id=self.project.id, ) event_from_nodestore = Event(project_id=self.project.id, event_id="a" * 32) event_from_snuba = Event( project_id=self.project.id, event_id="a" * 32, snuba_data=snuba.raw_query( selected_columns=[ col.value.event_name for col in eventstore.full_columns ], filter_keys={ "project_id": [self.project.id], "event_id": ["a" * 32] }, )["data"][0], ) assert event_from_nodestore.event_id == event_from_snuba.event_id assert event_from_nodestore.project_id == event_from_snuba.project_id assert event_from_nodestore.project == event_from_snuba.project assert event_from_nodestore.timestamp == event_from_snuba.timestamp assert event_from_nodestore.datetime == event_from_snuba.datetime assert event_from_nodestore.title == event_from_snuba.title assert event_from_nodestore.message[ "formatted"] == event_from_snuba.message assert event_from_nodestore.platform == event_from_snuba.platform assert event_from_nodestore.location == event_from_snuba.location assert event_from_nodestore.culprit == event_from_snuba.culprit assert event_from_nodestore.get_minimal_user( ) == event_from_snuba.get_minimal_user() assert event_from_nodestore.ip_address == event_from_snuba.ip_address assert event_from_nodestore.tags == event_from_snuba.tags # Group ID must be fetched from Snuba since it is not present in nodestore assert event_from_snuba.group_id assert event_from_snuba.group assert not event_from_nodestore.group_id assert not event_from_nodestore.group
def chunk(self): conditions = [] if self.last_event is not None: conditions.extend( [ ["timestamp", "<=", self.last_event.timestamp], [ ["timestamp", "<", self.last_event.timestamp], ["event_id", "<", self.last_event.event_id], ], ] ) events = eventstore.get_unfetched_events( filter=eventstore.Filter( conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id] ), limit=self.DEFAULT_CHUNK_SIZE, referrer="deletions.group", orderby=["-timestamp", "-event_id"], ) if not events: return False self.last_event = events[-1] # Remove from nodestore node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events] nodestore.delete_multi(node_ids) from sentry.reprocessing2 import delete_unprocessed_events delete_unprocessed_events(events) # Remove EventAttachment and UserReport *again* as those may not have a # group ID, therefore there may be dangling ones after "regular" model # deletion. event_ids = [event.event_id for event in events] models.EventAttachment.objects.filter( event_id__in=event_ids, project_id=self.project_id ).delete() models.UserReport.objects.filter( event_id__in=event_ids, project_id=self.project_id ).delete() return True
def test_grouping_reset(self): """ Regression test against a specific mutability bug involving grouping, stacktrace normalization and memoized interfaces """ event_data = { "exception": { "values": [ { "type": "Hello", "stacktrace": { "frames": [ { "function": "foo", }, { "function": "bar", }, ] }, } ] }, } enhancement = Enhancements.from_config_string( """ function:foo category=foo_like category:foo_like -group """, ) grouping_config = { "enhancements": enhancement.dumps(), "id": "mobile:2021-02-12", } event1 = Event( event_id="a" * 32, data=event_data, project_id=self.project.id, ) variants1 = event1.get_grouping_variants(grouping_config, normalize_stacktraces=True) event2 = Event( event_id="b" * 32, data=event_data, project_id=self.project.id, ) event2.interfaces # Populate cache variants2 = event2.get_grouping_variants(grouping_config, normalize_stacktraces=True) assert sorted(v.as_dict()["hash"] for v in variants1.values()) == sorted( v.as_dict()["hash"] for v in variants2.values() )
def test_dupe_message_id(self, eventstream_insert): # Saves the latest event to nodestore and eventstream project_id = 1 event_id = "a" * 32 node_id = Event.generate_node_id(project_id, event_id) manager = EventManager(make_event(event_id=event_id, message="first")) manager.normalize() manager.save(project_id) assert nodestore.get(node_id)["logentry"]["formatted"] == "first" manager = EventManager(make_event(event_id=event_id, message="second")) manager.normalize() manager.save(project_id) assert nodestore.get(node_id)["logentry"]["formatted"] == "second" assert eventstream_insert.call_count == 2
def test_recursion_breaker(self): configure_sdk() Hub.current.bind_client(Hub.main.client) # If this test terminates at all then we avoided recursion. with self.tasks(): with mock.patch( "sentry.event_manager.EventManager.save", side_effect=ValueError("oh no!") ) as save: event_id = raven.captureMessage("internal client test") event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id)) assert event is None assert_mock_called_once_with_partial( save, settings.SENTRY_PROJECT, cache_key=u"e:{}:1".format(event_id) )
def test_encoding(self): configure_sdk() Hub.current.bind_client(Hub.main.client) class NotJSONSerializable: pass with self.tasks(): event_id = raven.captureMessage( "check the req", extra={"request": NotJSONSerializable()} ) event = nodestore.get(Event.generate_node_id(settings.SENTRY_PROJECT, event_id)) assert event["project"] == settings.SENTRY_PROJECT assert event["logentry"]["formatted"] == "check the req" assert "NotJSONSerializable" in event["extra"]["request"]
def handle_remaining_events(project_id, new_group_id, event_ids, remaining_events, from_timestamp, to_timestamp): """ Delete or merge/move associated per-event data: nodestore, event attachments, user reports. Mark the event as "tombstoned" in Snuba. This is not full event deletion. Snuba can still only delete entire groups, however we must only run this task for event IDs that we don't intend to reuse for reprocessed events. An event ID that is once tombstoned cannot be inserted over in eventstream. See doccomment in sentry.reprocessing2. """ assert remaining_events in ("delete", "keep") if remaining_events == "delete": models.EventAttachment.objects.filter(project_id=project_id, event_id__in=event_ids).delete() models.UserReport.objects.filter(project_id=project_id, event_id__in=event_ids).delete() # Remove from nodestore node_ids = [ Event.generate_node_id(project_id, event_id) for event_id in event_ids ] nodestore.delete_multi(node_ids) # Tell Snuba to delete the event data. eventstream.tombstone_events_unsafe(project_id, event_ids, from_timestamp=from_timestamp, to_timestamp=to_timestamp) elif remaining_events == "keep": eventstream.replace_group_unsafe( project_id, event_ids, new_group_id=new_group_id, from_timestamp=from_timestamp, to_timestamp=to_timestamp, ) else: raise ValueError( f"Invalid value for remaining_events: {remaining_events}")
def get(self, request: Request, organization) -> Response: """ Generate a list of data scrubbing selectors from existing event data. This list is used to auto-complete settings in "Data Scrubbing" / "Security and Privacy" settings. """ event_id = request.GET.get("eventId", None) # For organization settings we access all projects the user has access # to. For the project level, `get_projects` will give us back a single # project. # # Filtering by the projects that self.get_projects returns deals with # permission concerns. # # The org-wide search for the event ID is quite slow, but we cannot fix # that without product redesign. projects = self.get_projects(request, organization) project_ids = [project.id for project in projects] suggestions = {} if event_id: # go to nodestore directly instead of eventstore.get_events, which # would not return transaction events node_ids = [ Event.generate_node_id(p, event_id) for p in project_ids ] all_data = nodestore.get_multi(node_ids) for data in filter(None, all_data.values()): for selector in pii_selector_suggestions_from_event(data): examples_ = suggestions.setdefault(selector["path"], []) if selector["value"]: examples_.append(selector["value"]) return Response({ "suggestions": [{ "type": "value", "value": value, "examples": examples } for value, examples in suggestions.items()] })
def save_event(): data = {"timestamp": time.time()} evt = Event( default_project.id, "89aeed6a472e4c5fb992d14df4d7e1b6", data=data, ) return_values.append( _save_aggregate( evt, flat_hashes=["a" * 32, "b" * 32], hierarchical_hashes=[], release=None, data=data, level=10, culprit="", ))
def inner(last_frame): data = {"timestamp": time.time()} evt = Event( default_project.id, uuid.uuid4().hex, data=data, ) return _save_aggregate( evt, flat_hashes=["a" * 32, "b" * 32], hierarchical_hashes=[ "c" * 32, "d" * 32, "e" * 32, last_frame * 32 ], release=None, data=data, level=10, culprit="", )
def chunk(self): conditions = [] if self.last_event is not None: conditions.extend([ ["timestamp", "<=", self.last_event.timestamp], [ ["timestamp", "<", self.last_event.timestamp], ["event_id", "<", self.last_event.event_id], ], ]) events = eventstore.get_unfetched_events( filter=eventstore.Filter(conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id]), limit=self.DEFAULT_CHUNK_SIZE, referrer="deletions.group", orderby=["-timestamp", "-event_id"], ) if not events: return False self.last_event = events[-1] # Remove from nodestore node_ids = [ Event.generate_node_id(self.project_id, event.event_id) for event in events ] nodestore.delete_multi(node_ids) delete_unprocessed_events(events) # Remove EventAttachment and UserReport event_ids = [event.event_id for event in events] EventAttachment.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() UserReport.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() return True
def passes(self, event: Event, state: EventState, **kwargs: Any) -> bool: desired_level_raw = self.get_option("level") desired_match = self.get_option("match") if not (desired_level_raw and desired_match): return False desired_level = int(desired_level_raw) # Fetch the event level from the tags since event.level is # event.group.level which may have changed try: level: int = LOG_LEVELS_MAP[event.get_tag("level")] except KeyError: return False if desired_match == MatchType.EQUAL: return level == desired_level elif desired_match == MatchType.GREATER_OR_EQUAL: return level >= desired_level elif desired_match == MatchType.LESS_OR_EQUAL: return level <= desired_level return False