def extract_contexts_custom( self, output: MutableMapping[str, Any], event: Mapping[str, Any], contexts: Mapping[str, Any], metadata: Optional[KafkaMessageMetadata] = None, ) -> None: key, value = extract_extra_contexts(contexts) output["_contexts_flattened"] = flatten_nested_field(key, value)
def process_insert( self, event: Mapping[str, Any], metadata: Optional[KafkaMessageMetadata] = None ) -> Optional[Mapping[str, Any]]: if not self._should_process(event): return None processed = {"deleted": 0} extract_project_id(processed, event) self._extract_event_id(processed, event) processed["retention_days"] = enforce_retention( event, datetime.strptime(event["datetime"], settings.PAYLOAD_DATETIME_FORMAT), ) self.extract_required(processed, event) data = event.get("data", {}) # HACK: https://sentry.io/sentry/snuba/issues/802102397/ if not data: logger.error("No data for event: %s", event, exc_info=True) return None self.extract_common(processed, event, metadata) self.extract_custom(processed, event, metadata) sdk = data.get("sdk", None) or {} self.extract_sdk(processed, sdk) tags = _as_dict_safe(data.get("tags", None)) self.extract_promoted_tags(processed, tags) self.extract_tags_custom(processed, event, tags, metadata) contexts = data.get("contexts", None) or {} self.extract_promoted_contexts(processed, contexts, tags) self.extract_contexts_custom(processed, event, contexts, metadata) processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) exception = (data.get("exception", data.get("sentry.interfaces.Exception", None)) or {}) stacks = exception.get("values", None) or [] self.extract_stacktraces(processed, stacks) if metadata is not None: processed["offset"] = metadata.offset processed["partition"] = metadata.partition return processed
def _process_contexts_and_user( self, processed: MutableMapping[str, Any], event_dict: EventDict, ) -> None: contexts: MutableMapping[str, Any] = _as_dict_safe(event_dict["data"].get( "contexts", None)) user_dict = (event_dict["data"].get( "user", event_dict["data"].get("sentry.interfaces.User", None)) or {}) geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] sanitized_contexts = self._sanitize_contexts(processed, event_dict) processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(sanitized_contexts) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address)
def process_message(self, message, metadata) -> Optional[ProcessedMessage]: processed = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data["timestamp"]), ) if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] try: processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed[ "start_ts"], processed["start_ms"] = self.__extract_timestamp( data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get( status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") except Exception: # all these fields are required but we saw some events go through here # in the past. For now bail. return processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested(measurements, lambda value: float(value["value"])) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["_contexts_flattened"] = flatten_nested_field( processed["contexts.key"], processed["contexts.value"]) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed])
def test_extract_contexts(self): contexts = { "app": { "device_app_hash": "the_app_device_uuid" }, "os": { "name": "the_os_name", "version": "the_os_version", "rooted": True, "build": "the_os_build", "kernel_version": "the_os_kernel_version", }, "runtime": { "name": "the_runtime_name", "version": "the_runtime_version" }, "browser": { "name": "the_browser_name", "version": "the_browser_version" }, "device": { "model": "the_device_model", "family": "the_device_family", "name": "the_device_name", "brand": "the_device_brand", "locale": "the_device_locale", "uuid": "the_device_uuid", "model_id": "the_device_model_id", "arch": "the_device_arch", "battery_level": 30, "orientation": "the_device_orientation", "simulator": False, "online": True, "charging": True, }, "extra": { "type": "extra", # unnecessary "null": None, "int": 0, "float": 1.3, "list": [1, 2, 3], "dict": { "key": "value" }, "str": "string", "\ud83c": "invalid utf-8 surrogate", }, } orig_tags = { "app.device": "the_app_device_uuid", "os": "the_os_name the_os_version", "os.name": "the_os_name", "os.rooted": True, "runtime": "the_runtime_name the_runtime_version", "runtime.name": "the_runtime_name", "browser": "the_browser_name the_browser_version", "browser.name": "the_browser_name", "device": "the_device_model", "device.family": "the_device_family", "extra_tag": "extra_value", } tags = orig_tags.copy() output = {} enforce_table_writer(self.dataset).get_stream_loader().get_processor( ).extract_promoted_contexts(output, contexts, tags) assert output == { "app_device": u"the_app_device_uuid", "browser": u"the_browser_name the_browser_version", "browser_name": u"the_browser_name", "device": u"the_device_model", "device_arch": u"the_device_arch", "device_battery_level": 30.0, "device_brand": u"the_device_brand", "device_charging": True, "device_family": u"the_device_family", "device_locale": u"the_device_locale", "device_model_id": u"the_device_model_id", "device_name": u"the_device_name", "device_online": True, "device_orientation": u"the_device_orientation", "device_simulator": False, "device_uuid": u"the_device_uuid", "os": u"the_os_name the_os_version", "os_build": u"the_os_build", "os_kernel_version": u"the_os_kernel_version", "os_name": u"the_os_name", "os_rooted": True, "runtime": u"the_runtime_name the_runtime_version", "runtime_name": u"the_runtime_name", } assert contexts == { "app": {}, "browser": {}, "device": {}, "extra": { "dict": { "key": "value" }, "\ud83c": "invalid utf-8 surrogate", "float": 1.3, "int": 0, "list": [1, 2, 3], "null": None, "type": "extra", "str": "string", }, "os": {}, "runtime": {}, } assert tags == orig_tags extra_output = {} ( extra_output["contexts.key"], extra_output["contexts.value"], ) = extract_extra_contexts(contexts) assert extra_output == { "contexts.key": ["extra.int", "extra.float", "extra.str", "extra.\\ud83c"], "contexts.value": [u"0", u"1.3", u"string", u"invalid utf-8 surrogate"], }
def process_message( self, message: Tuple[int, str, Any], metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]: processed: MutableMapping[str, Any] = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) try: # We are purposely using a naive datetime here to work with the # rest of the codebase. We can be confident that clients are only # sending UTC dates. processed["retention_days"] = enforce_retention( event, datetime.utcfromtimestamp(data["timestamp"]), ) except EventTooOld: return None if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp(data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags: Mapping[str, Any] = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested( measurements, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) breakdowns = data.get("breakdowns") if breakdowns is not None: span_op_breakdowns = breakdowns.get("span_ops") if span_op_breakdowns is not None: try: ( processed["span_op_breakdowns.key"], processed["span_op_breakdowns.value"], ) = extract_nested( span_op_breakdowns, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid breakdowns.span_ops field.", extra={"span_op_breakdowns": span_op_breakdowns}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed], None)