def test_bind_nodes(self): """ Test that bind_nodes populates _node_data """ min_ago = iso_format(before_now(minutes=1)) self.store_event( data={ "event_id": "a" * 32, "timestamp": min_ago, "user": { "id": u"user1" } }, project_id=self.project.id, ) self.store_event( data={ "event_id": "b" * 32, "timestamp": min_ago, "user": { "id": u"user2" } }, project_id=self.project.id, ) event = Event(project_id=self.project.id, event_id="a" * 32) event2 = Event(project_id=self.project.id, event_id="b" * 32) assert event.data._node_data is None self.eventstorage.bind_nodes([event, event2], "data") assert event.data._node_data is not None assert event.data["user"]["id"] == u"user1"
def test_event_node_id(self): # Create an event without specifying node_id. A node_id should be generated e1 = Event(project_id=1, event_id="abc", data={"foo": "bar"}) assert e1.data.id is not None, "We should have generated a node_id for this event" e1_node_id = e1.data.id e1.data.save() e1_body = nodestore.get(e1_node_id) assert e1_body == { "foo": "bar" }, "The event body should be in nodestore" e1 = Event(project_id=1, event_id="abc") assert e1.data.data == { "foo": "bar" }, "The event body should be loaded from nodestore" assert e1.data.id == e1_node_id, "The event's node_id should be the same after load" # Event with no data should not be saved to nodestore e2 = Event(project_id=1, event_id="mno", data=None) e2_node_id = e2.data.id assert e2.data.data == {} # NodeData returns {} by default eventstore.bind_nodes([e2], "data") assert e2.data.data == {} e2_body = nodestore.get(e2_node_id) assert e2_body is None
def test_bind_nodes(self): """ Test that bind_nodes populates _node_data """ min_ago = iso_format(before_now(minutes=1)) self.store_event( data={"event_id": "a" * 32, "timestamp": min_ago, "user": {"id": u"user1"}}, project_id=self.project.id, ) self.store_event( data={"event_id": "b" * 32, "timestamp": min_ago, "user": {"id": u"user2"}}, project_id=self.project.id, ) event = Event(project_id=self.project.id, event_id="a" * 32) event2 = Event(project_id=self.project.id, event_id="b" * 32) assert event.data._node_data is None self.eventstorage.bind_nodes([event, event2], "data") assert event.data._node_data is not None assert event.data["user"]["id"] == u"user1" # Bind nodes is noop if node data was already fetched with mock.patch.object(nodestore, "get_multi") as mock_get_multi: self.eventstorage.bind_nodes([event, event2]) assert mock_get_multi.call_count == 0
def test_snuba_data(self): self.store_event( data={ "event_id": "a" * 32, "message": "Hello World!", "tags": { "logger": "foobar", "site": "foo", "server_name": "bar" }, "user": { "id": "test", "email": "*****@*****.**" }, "timestamp": iso_format(before_now(seconds=1)), }, project_id=self.project.id, ) event_from_nodestore = Event(project_id=self.project.id, event_id="a" * 32) event_from_snuba = Event( project_id=self.project.id, event_id="a" * 32, snuba_data=snuba.raw_query( selected_columns=[ col.value.event_name for col in eventstore.full_columns ], filter_keys={ "project_id": [self.project.id], "event_id": ["a" * 32] }, )["data"][0], ) assert event_from_nodestore.event_id == event_from_snuba.event_id assert event_from_nodestore.project_id == event_from_snuba.project_id assert event_from_nodestore.project == event_from_snuba.project assert event_from_nodestore.timestamp == event_from_snuba.timestamp assert event_from_nodestore.datetime == event_from_snuba.datetime assert event_from_nodestore.title == event_from_snuba.title assert event_from_nodestore.message[ "formatted"] == event_from_snuba.message assert event_from_nodestore.platform == event_from_snuba.platform assert event_from_nodestore.location == event_from_snuba.location assert event_from_nodestore.culprit == event_from_snuba.culprit assert event_from_nodestore.get_minimal_user( ) == event_from_snuba.get_minimal_user() assert event_from_nodestore.ip_address == event_from_snuba.ip_address assert event_from_nodestore.tags == event_from_snuba.tags # Group ID must be fetched from Snuba since it is not present in nodestore assert event_from_snuba.group_id assert event_from_snuba.group assert not event_from_nodestore.group_id assert not event_from_nodestore.group
def test_grouping_reset(self): """ Regression test against a specific mutability bug involving grouping, stacktrace normalization and memoized interfaces """ event_data = { "exception": { "values": [ { "type": "Hello", "stacktrace": { "frames": [ { "function": "foo", }, { "function": "bar", }, ] }, } ] }, } enhancement = Enhancements.from_config_string( """ function:foo category=foo_like category:foo_like -group """, ) grouping_config = { "enhancements": enhancement.dumps(), "id": "mobile:2021-02-12", } event1 = Event( event_id="a" * 32, data=event_data, project_id=self.project.id, ) variants1 = event1.get_grouping_variants(grouping_config, normalize_stacktraces=True) event2 = Event( event_id="b" * 32, data=event_data, project_id=self.project.id, ) event2.interfaces # Populate cache variants2 = event2.get_grouping_variants(grouping_config, normalize_stacktraces=True) assert sorted(v.as_dict()["hash"] for v in variants1.values()) == sorted( v.as_dict()["hash"] for v in variants2.values() )
def _process(self, filename): with open(filename, 'r') as file_: event_data = json.load(file_) event_id = event_data['event_id'] event = Event(self._project_id, event_id, group_id=None, data=event_data) flat_hashes, hierarchical_hashes = (event.get_hashes( force_config=self._config)) if not hierarchical_hashes: # Prevent events ending up in the project node hierarchical_hashes = ["NO_HASH"] item = extract_event_data(event) item['json_url'] = Path(filename).relative_to(self._event_dir) # Seems abundant to do this for every event, but it's faster # than synchronising between processes when to generate item['crash_report'] = get_crash_report(event) item['dump_variants'] = dump_variants(self._config, event) return flat_hashes, hierarchical_hashes, item
def _process_snuba_results(query_res, group: Group, user): event_ids = { row["latest_event_id"]: Event.generate_node_id(group.project_id, row["latest_event_id"]) for row in query_res } node_data = nodestore.get_multi(list(event_ids.values())) response = [] for row in query_res: response_item = { "hash": row["new_materialized_hash"], "eventCount": row["event_count"], } event_id = row["latest_event_id"] event_data = node_data.get(event_ids[event_id], None) if event_data is not None: event = Event(group.project_id, event_id, group_id=group.id, data=event_data) response_item["latestEvent"] = serialize(event, user, EventSerializer()) response.append(response_item) return response
def test_serialize_event(self): event = self.store_event( data={ "event_id": "a" * 32, "message": "Hello World!", "tags": { "logger": "foobar", "site": "foo", "server_name": "bar" }, }, project_id=self.project.id, ) group_id = event.group_id serialized = serialize(event) assert serialized["eventID"] == "a" * 32 assert serialized["projectID"] == six.text_type(self.project.id) assert serialized["groupID"] == six.text_type(group_id) assert serialized["message"] == "Hello World!" # Can serialize an event by loading node data event = Event(project_id=self.project.id, event_id="a" * 32, group_id=group_id) serialized = serialize(event) assert serialized["eventID"] == "a" * 32 assert serialized["projectID"] == six.text_type(self.project.id) assert serialized["groupID"] == six.text_type(group_id) assert serialized["message"] == "Hello World!"
def get_task_kwargs_for_insert(operation, event_data, task_state=None): if task_state and task_state.get("skip_consume", False): return None # nothing to do event_data["datetime"] = datetime.strptime( event_data["datetime"], "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=pytz.utc) # This data is already normalized as we're currently in the # ingestion pipeline and the event was in store # normalization just a few seconds ago. Running it through # Rust (re)normalization here again would be too slow. event_data["data"] = EventDict(event_data["data"], skip_renormalization=True) event = Event( event_id=event_data["event_id"], group_id=event_data["group_id"], project_id=event_data["project_id"], ) event.data.bind_data(event_data["data"]) kwargs = {"event": event, "primary_hash": event_data["primary_hash"]} for name in ("is_new", "is_regression", "is_new_group_environment"): kwargs[name] = task_state[name] return kwargs
def test_get_event_by_id_cached(self): # Simulate getting an event that exists in eventstore but has not yet been written to snuba. with mock.patch("sentry.eventstore.snuba.backend.Event") as mock_event: dummy_event = Event( project_id=self.project2.id, event_id="f" * 32, data={ "something": "hi", "timestamp": self.min_ago }, ) mock_event.return_value = dummy_event event = self.eventstore.get_event_by_id(self.project2.id, "f" * 32) # Result of query should be None assert event is None # Now we store the event properly, so it will exist in Snuba. self.store_event( data={ "event_id": "f" * 32, "timestamp": self.min_ago }, project_id=self.project2.id, ) # Make sure that the negative cache isn't causing the event to not show up event = self.eventstore.get_event_by_id(self.project2.id, "f" * 32) assert event.event_id == "f" * 32 assert event.project_id == self.project2.id assert event.group_id == event.group.id
def capture_nodestore_stats(project_id, event_id): set_current_project(project_id) from sentry import nodestore from sentry.eventstore.compressor import deduplicate from sentry.eventstore.models import Event event = Event(project_id=project_id, event_id=event_id) old_event_size = _json_size(dict(event.data)) if not event.data: metrics.incr("eventstore.compressor.error", tags={"reason": "no_data"}) return platform = event.platform for key, value in six.iteritems(event.interfaces): len_value = _json_size(value.to_json()) metrics.timing( "events.size.interface", len_value, tags={"interface": key, "platform": platform} ) new_data, extra_keys = deduplicate(dict(event.data)) total_size = event_size = _json_size(new_data) for key, value in six.iteritems(extra_keys): if nodestore.get(key) is not None: metrics.incr("eventstore.compressor.hits") # do not continue, nodestore.set() should bump TTL else: metrics.incr("eventstore.compressor.misses") total_size += _json_size(value) # key is md5sum of content # do not store actual value to keep prod impact to a minimum nodestore.set(key, {}) metrics.timing("events.size.deduplicated", event_size) metrics.timing("events.size.deduplicated.total_written", total_size) metrics.timing("events.size.deduplicated.ratio", event_size / old_event_size) metrics.timing("events.size.deduplicated.total_written.ratio", total_size / old_event_size) if total_size > old_event_size: nodestore_stats_logger.info( "events.size.deduplicated.details", extra={ "project_id": project_id, "event_id": event_id, "total_size": total_size, "old_event_size": old_event_size, }, )
def save_event(): data = {"timestamp": time.time()} evt = Event( default_project.id, "89aeed6a472e4c5fb992d14df4d7e1b6", data=data, ) return_values.append( _save_aggregate( evt, flat_hashes=["a" * 32, "b" * 32], hierarchical_hashes=[], release=None, data=data, level=10, culprit="", ))
def _process_snuba_results(query_res, group: Group, id: int, user): event_ids = { row["latest_event_id"]: Event.generate_node_id(group.project_id, row["latest_event_id"]) for row in query_res } node_data = nodestore.get_multi(list(event_ids.values())) response = [] for row in query_res: response_item = { "hash": row["new_materialized_hash"], "eventCount": row["event_count"], } event_id = row["latest_event_id"] event_data = node_data.get(event_ids[event_id], None) if event_data is not None: event = Event(group.project_id, event_id, group_id=group.id, data=event_data) response_item["latestEvent"] = serialize(event, user, EventSerializer()) tree_label = get_path( event_data, "hierarchical_tree_labels", id) or get_path( event_data, "hierarchical_tree_labels", -1) # Rough approximation of what happens with Group title event_type = get_event_type(event.data) metadata = dict(event.get_event_metadata()) metadata["current_tree_label"] = tree_label # Force rendering of grouping tree labels irrespective of platform metadata["display_title_with_tree_label"] = True title = event_type.get_title(metadata) response_item["title"] = title or event.title response_item["metadata"] = metadata response.append(response_item) return response
def inner(last_frame): data = {"timestamp": time.time()} evt = Event( default_project.id, uuid.uuid4().hex, data=data, ) return _save_aggregate( evt, flat_hashes=["a" * 32, "b" * 32], hierarchical_hashes=[ "c" * 32, "d" * 32, "e" * 32, last_frame * 32 ], release=None, data=data, level=10, culprit="", )
def test_bind_node_data(self): event = self.store_event( data={ "event_id": "a" * 32, "message": "test", "timestamp": iso_format(before_now(seconds=1)), "type": "error", }, project_id=self.project.id, ) group_id = event.group.id e1 = Event(self.project.id, "a" * 32, group_id=group_id) e1.bind_node_data() with mock.patch.object(nodestore, "get") as mock_get: event.bind_node_data() event.bind_node_data() assert mock_get.call_count == 0
def save_event(): data = {"timestamp": time.time()} evt = Event( default_project.id, "89aeed6a472e4c5fb992d14df4d7e1b6", data=data, ) return_values.append( _save_aggregate( evt, hashes=CalculatedHashes( hashes=["a" * 32, "b" * 32], hierarchical_hashes=[], tree_labels=[], ), release=None, metadata={}, received_timestamp=None, level=10, culprit="", ))
def inner(last_frame): data = {"timestamp": time.time(), "type": "error"} evt = Event( default_project.id, uuid.uuid4().hex, data=data, ) with task_runner(): return _save_aggregate( evt, hashes=CalculatedHashes( hashes=["a" * 32, "b" * 32], hierarchical_hashes=[ "c" * 32, "d" * 32, "e" * 32, last_frame * 32 ], tree_labels=[ [{ "function": "foo" }], [{ "function": "bar" }], [{ "function": "baz" }], [{ "function": "bam" }], ], ), release=None, metadata={}, received_timestamp=None, level=10, culprit="", )
def post_process_group( is_new, is_regression, is_new_group_environment, cache_key, group_id=None, **kwargs ): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.reprocessing2 import is_reprocessed_event from sentry.utils import snuba with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={"cache_key": cache_key, "reason": "missing_cache"}, ) return event = Event( project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data ) set_current_event_project(event.project_id) is_transaction_event = not bool(event.group_id) from sentry.models import EventDict, Organization, Project # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project.set_cached_field_value( "organization", Organization.objects.get_from_cache(id=event.project.organization_id) ) # Simplified post processing for transaction events. # This should eventually be completely removed and transactions # will not go through any post processing. if is_transaction_event: transaction_processed.send_robust( sender=post_process_group, project=event.project, event=event, ) event_processing_store.delete_by_key(cache_key) return is_reprocessed = is_reprocessed_event(event.data) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Commit, GroupInboxReason from sentry.models.group import get_group_with_redirect from sentry.models.groupinbox import add_group_to_inbox from sentry.rules.processor import RuleProcessor from sentry.tasks.groupowner import process_suspect_commits from sentry.tasks.servicehooks import process_service_hook # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id event.group.project = event.project event.group.project.set_cached_field_value("organization", event.project.organization) bind_organization_context(event.project.organization) _capture_stats(event, is_new) if is_reprocessed and is_new: add_group_to_inbox(event.group, GroupInboxReason.REPROCESSED) if not is_reprocessed: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = False if is_new else process_snoozes(event.group) if not has_reappeared: # If true, we added the .UNIGNORED reason already if is_new: add_group_to_inbox(event.group, GroupInboxReason.NEW) elif is_regression: add_group_to_inbox(event.group, GroupInboxReason.REGRESSION) handle_owner_assignment(event.project, event.group, event) rp = RuleProcessor( event, is_new, is_regression, is_new_group_environment, has_reappeared ) has_alert = False # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True safe_execute(callback, event, futures, _with_transaction=False) try: lock = locks.get( f"w-o:{event.group_id}-d-l", duration=10, ) with lock.acquire(): has_commit_key = f"w-o:{event.project.organization_id}-h-c" org_has_commit = cache.get(has_commit_key) if org_has_commit is None: org_has_commit = Commit.objects.filter( organization_id=event.project.organization_id ).exists() cache.set(has_commit_key, org_has_commit, 3600) if org_has_commit: group_cache_key = f"w-o-i:g-{event.group_id}" if cache.get(group_cache_key): metrics.incr( "sentry.tasks.process_suspect_commits.debounce", tags={"detail": "w-o-i:g debounce"}, ) else: from sentry.utils.committers import get_frame_paths cache.set(group_cache_key, True, 604800) # 1 week in seconds event_frames = get_frame_paths(event.data) process_suspect_commits.delay( event_id=event.event_id, event_platform=event.platform, event_frames=event_frames, group_id=event.group_id, project_id=event.project_id, ) except UnableToAcquireLock: pass except Exception: logger.exception("Failed to process suspect commits") if features.has("projects:servicehooks", project=event.project): allowed_events = {"event.created"} if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks(project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay(servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type() == "error" and _should_send_error_created_hooks( event.project ): process_resource_change_bound.delay( action="created", sender="Error", instance_id=event.event_id, instance=event ) if is_new: process_resource_change_bound.delay( action="created", sender="Group", instance_id=event.group_id ) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group( plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression ) from sentry import similarity safe_execute(similarity.record, event.project, [event], _with_transaction=False) # Patch attachments that were ingested on the standalone path. update_existing_attachments(event) if not is_reprocessed: event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def post_process_group(is_new, is_regression, is_new_group_environment, cache_key, group_id=None, event=None, **kwargs): """ Fires post processing hooks for a group. """ from sentry.eventstore.models import Event from sentry.eventstore.processing import event_processing_store from sentry.utils import snuba from sentry.reprocessing2 import is_reprocessed_event with snuba.options_override({"consistent": True}): # We use the data being present/missing in the processing store # to ensure that we don't duplicate work should the forwarding consumers # need to rewind history. # # While we always send the cache_key and never send the event parameter now, # the code to handle `event` has to stick around for a self-hosted release cycle. if cache_key and event is None: data = event_processing_store.get(cache_key) if not data: logger.info( "post_process.skipped", extra={ "cache_key": cache_key, "reason": "missing_cache" }, ) return event = Event(project_id=data["project"], event_id=data["event_id"], group_id=group_id, data=data) elif event and check_event_already_post_processed(event): if cache_key: event_processing_store.delete_by_key(cache_key) logger.info( "post_process.skipped", extra={ "reason": "duplicate", "project_id": event.project_id, "event_id": event.event_id, }, ) return if is_reprocessed_event(event.data): logger.info( "post_process.skipped", extra={ "project_id": event.project_id, "event_id": event.event_id, "reason": "reprocessed", }, ) return set_current_project(event.project_id) # NOTE: we must pass through the full Event object, and not an # event_id since the Event object may not actually have been stored # in the database due to sampling. from sentry.models import Project, Organization, EventDict from sentry.models.group import get_group_with_redirect from sentry.rules.processor import RuleProcessor from sentry.tasks.servicehooks import process_service_hook # Re-bind node data to avoid renormalization. We only want to # renormalize when loading old data from the database. event.data = EventDict(event.data, skip_renormalization=True) if event.group_id: # Re-bind Group since we're reading the Event object # from cache, which may contain a stale group and project event.group, _ = get_group_with_redirect(event.group_id) event.group_id = event.group.id # Re-bind Project and Org since we're reading the Event object # from cache which may contain stale parent models. event.project = Project.objects.get_from_cache(id=event.project_id) event.project._organization_cache = Organization.objects.get_from_cache( id=event.project.organization_id) bind_organization_context(event.project.organization) _capture_stats(event, is_new) if event.group_id: # we process snoozes before rules as it might create a regression # but not if it's new because you can't immediately snooze a new group has_reappeared = False if is_new else process_snoozes(event.group) handle_owner_assignment(event.project, event.group, event) rp = RuleProcessor(event, is_new, is_regression, is_new_group_environment, has_reappeared) has_alert = False # TODO(dcramer): ideally this would fanout, but serializing giant # objects back and forth isn't super efficient for callback, futures in rp.apply(): has_alert = True with sentry_sdk.start_transaction(op="post_process_group", name="rule_processor_apply", sampled=True): safe_execute(callback, event, futures) if features.has("projects:servicehooks", project=event.project): allowed_events = set(["event.created"]) if has_alert: allowed_events.add("event.alert") if allowed_events: for servicehook_id, events in _get_service_hooks( project_id=event.project_id): if any(e in allowed_events for e in events): process_service_hook.delay( servicehook_id=servicehook_id, event=event) from sentry.tasks.sentry_apps import process_resource_change_bound if event.get_event_type( ) == "error" and _should_send_error_created_hooks(event.project): process_resource_change_bound.delay(action="created", sender="Error", instance_id=event.event_id, instance=event) if is_new: process_resource_change_bound.delay(action="created", sender="Group", instance_id=event.group_id) from sentry.plugins.base import plugins for plugin in plugins.for_project(event.project): plugin_post_process_group(plugin_slug=plugin.slug, event=event, is_new=is_new, is_regresion=is_regression) event_processed.send_robust( sender=post_process_group, project=event.project, event=event, primary_hash=kwargs.get("primary_hash"), ) with metrics.timer("tasks.post_process.delete_event_cache"): event_processing_store.delete_by_key(cache_key)
def test_accepts_valid_ref(self): self.store_event(data={"event_id": "a" * 32}, project_id=self.project.id) event = Event(project_id=self.project.id, event_id="a" * 32) event.data.bind_ref(event) event.bind_node_data() assert event.data.ref == event.project.id