def extract_user(output: MutableMapping[str, Any], user: Mapping[str, Any]) -> None: output["user_id"] = _unicodify(user.get("id", None)) output["username"] = _unicodify(user.get("username", None)) output["email"] = _unicodify(user.get("email", None)) ip_addr = _ensure_valid_ip(user.get("ip_address", None)) output["ip_address"] = str(ip_addr) if ip_addr is not None else None
def extract_custom( self, output: MutableMapping[str, Any], event: InsertEvent, metadata: KafkaMessageMetadata, ) -> None: data = event.get("data", {}) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) output["user_name"] = user_data["username"] output["user_id"] = user_data["user_id"] output["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: output["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: output["ip_address_v6"] = str(ip_address) contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) geo = user_dict.get("geo", {}) if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) output["http_method"] = http_data["http_method"] output["http_referer"] = http_data["http_referer"] # _as_dict_safe may not return a reference to the entry in the data # dictionary in some cases. data["contexts"] = contexts output["message"] = _unicodify(event["message"]) output["primary_hash"] = str(uuid.UUID(_hashify( event["primary_hash"]))) output["hierarchical_hashes"] = list( str(uuid.UUID(_hashify(x))) for x in data.get("hierarchical_hashes") or ()) output["culprit"] = _unicodify(data.get("culprit", "")) output["type"] = _unicodify(data.get("type", "")) output["title"] = _unicodify(data.get("title", ""))
def extract_custom( self, output: MutableMapping[str, Any], event: Mapping[str, Any], metadata: Optional[KafkaMessageMetadata] = None, ) -> None: data = event.get("data", {}) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) output["user_name"] = user_data["username"] output["user_id"] = user_data["user_id"] output["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: output["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: output["ip_address_v6"] = str(ip_address) contexts = _as_dict_safe(data.get("contexts", None)) geo = user_dict.get("geo", {}) if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} if "request" not in contexts and isinstance(request, dict): http = {} http["http_method"] = _unicodify(request.get("method", None)) http_headers = _as_dict_safe(request.get("headers", None)) http["http_referer"] = _unicodify(http_headers.get( "Referer", None)) contexts["request"] = http # _as_dict_safe may not return a reference to the entry in the data # dictionary in some cases. data["contexts"] = contexts output["message"] = _unicodify(event["message"]) output["org_id"] = event["organization_id"]
def _process_user( self, processed: MutableMapping[str, Any], event_dict: EventDict, ) -> None: user_dict = (event_dict["data"].get( "user", event_dict["data"].get("sentry.interfaces.User", None)) or {}) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address)
def _process_contexts_and_user( self, processed: MutableMapping[str, Any], event_dict: EventDict, ) -> None: contexts: MutableMapping[str, Any] = _as_dict_safe(event_dict["data"].get( "contexts", None)) user_dict = (event_dict["data"].get( "user", event_dict["data"].get("sentry.interfaces.User", None)) or {}) geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] sanitized_contexts = self._sanitize_contexts(processed, event_dict) processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(sanitized_contexts) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address)
def process_message(self, message, metadata) -> Optional[ProcessedMessage]: processed = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data["timestamp"]), ) if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] try: processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed[ "start_ts"], processed["start_ms"] = self.__extract_timestamp( data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get( status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") except Exception: # all these fields are required but we saw some events go through here # in the past. For now bail. return processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested(measurements, lambda value: float(value["value"])) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["_contexts_flattened"] = flatten_nested_field( processed["contexts.key"], processed["contexts.value"]) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed])
def extract_user(output, user): output['user_id'] = _unicodify(user.get('id', None)) output['username'] = _unicodify(user.get('username', None)) output['email'] = _unicodify(user.get('email', None)) ip_addr = _ensure_valid_ip(user.get('ip_address', None)) output['ip_address'] = str(ip_addr) if ip_addr is not None else None
def process_message( self, message: Tuple[int, str, Any], metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]: processed: MutableMapping[str, Any] = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) try: # We are purposely using a naive datetime here to work with the # rest of the codebase. We can be confident that clients are only # sending UTC dates. processed["retention_days"] = enforce_retention( event, datetime.utcfromtimestamp(data["timestamp"]), ) except EventTooOld: return None if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp(data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags: Mapping[str, Any] = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested( measurements, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) breakdowns = data.get("breakdowns") if breakdowns is not None: span_op_breakdowns = breakdowns.get("span_ops") if span_op_breakdowns is not None: try: ( processed["span_op_breakdowns.key"], processed["span_op_breakdowns.value"], ) = extract_nested( span_op_breakdowns, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid breakdowns.span_ops field.", extra={"span_op_breakdowns": span_op_breakdowns}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed], None)
def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]: action_type = ProcessorAction.INSERT processed = {'deleted': 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != 'insert': return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data['timestamp']), ) transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify(transaction_ctx.get("op", "")) processed["transaction_name"] = _unicodify(data["transaction"]) processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp(data["start_timestamp"], ) processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) processed['platform'] = _unicodify(event['platform']) tags = _as_dict_safe(data.get('tags', None)) extract_extra_tags(processed, tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get('contexts', None)) extract_extra_contexts(processed, contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, data.get("user", {})) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) if metadata is not None: processed['partition'] = metadata.partition processed['offset'] = metadata.offset return ProcessedMessage( action=action_type, data=[processed], )
def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]: action_type = ProcessorAction.INSERT processed = {'deleted': 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != 'insert': return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data['timestamp']), ) if not data.get('contexts', {}).get('trace'): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] try: processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op", "")) processed["transaction_name"] = _unicodify(data["transaction"]) processed[ "start_ts"], processed["start_ms"] = self.__extract_timestamp( data["start_timestamp"], ) if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment('negative_duration') except Exception: # all these fields are required but we saw some events go through here # in the past. For now bail. return processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed['duration'] = max(int(duration_secs * 1000), 0) processed['platform'] = _unicodify(event['platform']) tags = _as_dict_safe(data.get('tags', None)) extract_extra_tags(processed, tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get('contexts', None)) extract_extra_contexts(processed, contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, data.get("user", {})) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) if metadata is not None: processed['partition'] = metadata.partition processed['offset'] = metadata.offset return ProcessedMessage( action=action_type, data=[processed], )