def process_insert( self, event: Mapping[str, Any], metadata: Optional[KafkaMessageMetadata] = None ) -> Optional[Mapping[str, Any]]: if not self._should_process(event): return None processed = {"deleted": 0} extract_project_id(processed, event) self._extract_event_id(processed, event) processed["retention_days"] = enforce_retention( event, datetime.strptime(event["datetime"], settings.PAYLOAD_DATETIME_FORMAT), ) self.extract_required(processed, event) data = event.get("data", {}) # HACK: https://sentry.io/sentry/snuba/issues/802102397/ if not data: logger.error("No data for event: %s", event, exc_info=True) return None self.extract_common(processed, event, metadata) self.extract_custom(processed, event, metadata) sdk = data.get("sdk", None) or {} self.extract_sdk(processed, sdk) tags = _as_dict_safe(data.get("tags", None)) self.extract_promoted_tags(processed, tags) self.extract_tags_custom(processed, event, tags, metadata) contexts = data.get("contexts", None) or {} self.extract_promoted_contexts(processed, contexts, tags) self.extract_contexts_custom(processed, event, contexts, metadata) processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) exception = (data.get("exception", data.get("sentry.interfaces.Exception", None)) or {}) stacks = exception.get("values", None) or [] self.extract_stacktraces(processed, stacks) if metadata is not None: processed["offset"] = metadata.offset processed["partition"] = metadata.partition return processed
def __fill_common(self, span: MutableMapping[str, Any], data: Mapping[str, Any]) -> None: """ Fills in the fields that have the same structure between transactions and spans but that come from a different dictionary. """ span["start_ts"], span["start_ns"] = self.__extract_timestamp( data["start_timestamp"], ) span["finish_ts"], span["finish_ns"] = self.__extract_timestamp( data["timestamp"], ) duration_secs = (span["finish_ts"] - span["start_ts"]).total_seconds() # duration is in milliseconds span["duration_ms"] = max(int(duration_secs * 1000), 0) tags = _as_dict_safe(data.get("tags", None)) span["tags.key"], span["tags.value"] = extract_extra_tags(tags)
def test_extract_tags(self): orig_tags = { "sentry:user": "******", "level": "the_level", "logger": "the_logger", "server_name": "the_servername", "transaction": "the_transaction", "environment": "the_enviroment", "sentry:release": "the_release", "sentry:dist": "the_dist", "site": "the_site", "url": "the_url", "extra_tag": "extra_value", "null_tag": None, } tags = orig_tags.copy() output = {} enforce_table_writer(self.dataset).get_stream_loader().get_processor( ).extract_promoted_tags(output, tags) assert output == { "sentry:dist": "the_dist", "environment": u"the_enviroment", "level": u"the_level", "logger": u"the_logger", "sentry:release": "the_release", "server_name": u"the_servername", "site": u"the_site", "transaction": u"the_transaction", "url": u"the_url", "sentry:user": u"the_user", } assert tags == orig_tags extra_output = {} extra_output["tags.key"], extra_output[ "tags.value"] = extract_extra_tags(tags) valid_items = [(k, v) for k, v in sorted(orig_tags.items()) if v] assert extra_output == { "tags.key": [k for k, v in valid_items], "tags.value": [v for k, v in valid_items], }
def _process_tags( self, processed: MutableMapping[str, Any], event_dict: EventDict, ) -> None: tags: Mapping[str, Any] = _as_dict_safe(event_dict["data"].get( "tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event_dict.get("release"), ) processed["environment"] = promoted_tags.get("environment") processed["user"] = promoted_tags.get("sentry:user", "") processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", event_dict["data"].get("dist")), )
def process_message(self, message, metadata) -> Optional[ProcessedMessage]: processed = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data["timestamp"]), ) if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] try: processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed[ "start_ts"], processed["start_ms"] = self.__extract_timestamp( data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get( status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") except Exception: # all these fields are required but we saw some events go through here # in the past. For now bail. return processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested(measurements, lambda value: float(value["value"])) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["_contexts_flattened"] = flatten_nested_field( processed["contexts.key"], processed["contexts.value"]) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed])
def process_message( self, message: Tuple[int, str, Any], metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]: processed: MutableMapping[str, Any] = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) try: # We are purposely using a naive datetime here to work with the # rest of the codebase. We can be confident that clients are only # sending UTC dates. processed["retention_days"] = enforce_retention( event, datetime.utcfromtimestamp(data["timestamp"]), ) except EventTooOld: return None if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp(data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags: Mapping[str, Any] = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested( measurements, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) breakdowns = data.get("breakdowns") if breakdowns is not None: span_op_breakdowns = breakdowns.get("span_ops") if span_op_breakdowns is not None: try: ( processed["span_op_breakdowns.key"], processed["span_op_breakdowns.value"], ) = extract_nested( span_op_breakdowns, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid breakdowns.span_ops field.", extra={"span_op_breakdowns": span_op_breakdowns}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed], None)
def process_message(self, message, metadata) -> Optional[ProcessedMessage]: if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None ret: List[MutableMapping[str, Any]] = [] # Add the transaction span transaction_ctx = data["contexts"].get("trace") if not transaction_ctx: metrics.increment("missing_trace_ctx") return None # Add the transaction root span processed = self.__init_span(event) processed["span_id"] = self.__safe_extract_int( "transaction:span_id", transaction_ctx["span_id"], False) if processed["span_id"] is None: return None processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed["parent_span_id"] = self.__safe_extract_int( "transaction:parent_span_id", transaction_ctx.get("parent_span_id"), True) processed["description"] = _unicodify(data.get("transaction") or "") processed["op"] = _unicodify(transaction_ctx.get("op") or "") status = transaction_ctx.get("status", None) self.__fill_status(processed, status) self.__fill_common(processed, event["data"]) ret.append(processed) spans = data.get("spans", []) for span in spans: processed = self.__init_span(event) processed["span_id"] = self.__safe_extract_int( "span:span_id", span["span_id"], False) if processed["span_id"] is None: return None processed["parent_span_id"] = self.__safe_extract_int( "span:parent_span_id", span.get("parent_span_id"), True) processed["description"] = span.get("description", "") or "" processed["op"] = span["op"] tags = _as_dict_safe(span.get("tags", None)) processed["tags.key"], processed[ "tags.value"] = extract_extra_tags(tags) status = span.get("status", None) self.__fill_status(processed, status) self.__fill_common(processed, span) ret.append(processed) if ret: return InsertBatch(ret) else: return None