예제 #1
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        replacement_topic = enforce_table_writer(
            self.dataset).get_stream_loader().get_replacement_topic_spec()
        test_worker = ConsumerWorker(self.dataset, producer,
                                     replacement_topic.topic_name,
                                     self.metrics)

        test_worker.flush_batch([
            ProcessedMessage(
                action=ProcessorAction.REPLACE,
                data=[('1', {
                    'project_id': 1
                })],
            ),
            ProcessedMessage(
                action=ProcessorAction.REPLACE,
                data=[('2', {
                    'project_id': 2
                })],
            ),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == \
            [('event-replacements', b'1', b'{"project_id": 1}'), ('event-replacements', b'2', b'{"project_id": 2}')]
예제 #2
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset,
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        test_worker.flush_batch(
            [
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("1", {"project_id": 1})],
                ),
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("2", {"project_id": 2})],
                ),
            ]
        )

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id": 1}'),
            ("event-replacements", b"2", b'{"project_id": 2}'),
        ]
예제 #3
0
 def test_v2_start_merge(self):
     project_id = 1
     message = (2, "start_merge", {"project_id": project_id})
     processor = (enforce_table_writer(
         self.dataset).get_stream_loader().get_processor())
     assert processor.process_message(message) == ProcessedMessage(
         action=ProcessorAction.REPLACE, data=[(str(project_id), message)])
예제 #4
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT

        projects = message["request"]["body"].get("project", [])
        if not isinstance(projects, (list, tuple)):
            projects = [projects]

        processed = {
            "request_id": str(uuid.UUID(message["request"]["id"])),
            "request_body": self.__to_json_string(message["request"]["body"]),
            "referrer": message["request"]["referrer"] or "",
            "dataset": message["dataset"],
            "projects": projects,
            # TODO: This column is empty for now, we plan to use it soon as we
            # will start to write org IDs into events and allow querying by org.
            "organization": None,
            "timestamp": message["timing"]["timestamp"],
            "duration_ms": message["timing"]["duration_ms"],
            "status": message["status"],
            **self.__extract_query_list(message["query_list"]),
        }

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )
예제 #5
0
    def process_message(self,
                        value,
                        metadata=None) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)
        v_uuid = value.get("event_id")
        message = {
            "org_id":
            value.get("org_id", 0),
            "project_id":
            value.get("project_id", 0),
            "key_id":
            value.get("key_id"),
            "timestamp":
            _ensure_valid_date(
                datetime.strptime(value["timestamp"],
                                  settings.PAYLOAD_DATETIME_FORMAT), ),
            "outcome":
            value["outcome"],
            "reason":
            _unicodify(value.get("reason")),
            "event_id":
            str(uuid.UUID(v_uuid)) if v_uuid is not None else None,
        }

        return ProcessedMessage(
            action=ProcessorAction.INSERT,
            data=[message],
        )
예제 #6
0
    def process_message(self, message, metadata=None) -> Optional[ProcessedMessage]:
        # some old relays accidentally emit rows without release
        if message["release"] is None:
            return None
        if message["duration"] is None:
            duration = None
        else:
            duration = _collapse_uint32(int(message["duration"] * 1000))

        # since duration is not nullable, the max duration means no duration
        if duration is None:
            duration = MAX_UINT32

        processed = {
            "session_id": str(uuid.UUID(message["session_id"])),
            "distinct_id": str(uuid.UUID(message.get("distinct_id") or NIL_UUID)),
            "seq": message["seq"],
            "org_id": message["org_id"],
            "project_id": message["project_id"],
            "retention_days": message["retention_days"],
            "duration": duration,
            "status": STATUS_MAPPING[message["status"]],
            "errors": _collapse_uint16(message["errors"]) or 0,
            "received": _ensure_valid_date(
                datetime.utcfromtimestamp(message["received"])
            ),
            "started": _ensure_valid_date(
                datetime.utcfromtimestamp(message["started"])
            ),
            "release": message["release"],
            "environment": message.get("environment") or "",
        }
        return ProcessedMessage(action=ProcessorAction.INSERT, data=[processed])
예제 #7
0
 def test_v2_end_delete_tag(self):
     project_id = 1
     message = (2, 'end_delete_tag', {'project_id': project_id})
     processor = enforce_table_writer(
         self.dataset).get_stream_loader().get_processor()
     assert processor.process_message(message) == \
         ProcessedMessage(
             action=ProcessorAction.REPLACE,
             data=[(str(project_id), message)],
     )
예제 #8
0
    def process_message(self, value, metadata) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)

        partition = metadata.partition
        assert (
            partition == KAFKA_ONLY_PARTITION
        ), "CDC can only work with single partition topics for consistency"

        offset = metadata.offset
        event = value["event"]
        if event == "begin":
            messages = self._process_begin(offset)
        elif event == "commit":
            messages = self._process_commit(offset)
        elif event == "change":
            table_name = value["table"]
            if table_name != self.pg_table:
                return None

            operation = value["kind"]
            if operation == "insert":
                messages = self._process_insert(
                    offset, value["columnnames"], value["columnvalues"]
                )
            elif operation == "update":
                messages = self._process_update(
                    offset,
                    value["oldkeys"],
                    value["columnnames"],
                    value["columnvalues"],
                )
            elif operation == "delete":
                messages = self._process_delete(offset, value["oldkeys"])
            else:
                raise ValueError(
                    "Invalid value for operation in replication log: %s" % value["kind"]
                )
        else:
            raise ValueError(
                "Invalid value for event in replication log: %s" % value["event"]
            )

        if not messages:
            return None

        return ProcessedMessage(action=ProcessorAction.INSERT, data=messages,)
예제 #9
0
    def process_message(self, value, metadata) -> Optional[ProcessedMessage]:
        assert isinstance(value, dict)

        partition = metadata.partition
        assert partition == KAFKA_ONLY_PARTITION, 'CDC can only work with single partition topics for consistency'

        offset = metadata.offset
        event = value['event']
        if event == 'begin':
            messages = self._process_begin(offset)
        elif event == 'commit':
            messages = self._process_commit(offset)
        elif event == 'change':
            table_name = value['table']
            if table_name != self.pg_table:
                return None

            operation = value['kind']
            if operation == 'insert':
                messages = self._process_insert(offset, value['columnnames'],
                                                value['columnvalues'])
            elif operation == 'update':
                messages = self._process_update(offset, value['oldkeys'],
                                                value['columnnames'],
                                                value['columnvalues'])
            elif operation == 'delete':
                messages = self._process_delete(offset, value['oldkeys'])
            else:
                raise ValueError(
                    "Invalid value for operation in replication log: %s" %
                    value['kind'])
        else:
            raise ValueError("Invalid value for event in replication log: %s" %
                             value['event'])

        if not messages:
            return None

        return ProcessedMessage(
            action=ProcessorAction.INSERT,
            data=messages,
        )
예제 #10
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        """\
        Process a raw message into a tuple of (action_type, processed_message):
        * action_type: one of the sentinel values INSERT or REPLACE
        * processed_message: dict representing the processed column -> value(s)

        Returns `None` if the event is too old to be written.
        """
        action_type = None

        if isinstance(message, dict):
            # deprecated unwrapped event message == insert
            action_type = ProcessorAction.INSERT
            try:
                processed = self.process_insert(message, metadata)
            except EventTooOld:
                return None
        elif isinstance(message, (list, tuple)) and len(message) >= 2:
            version = message[0]

            if version in (0, 1, 2):
                # version 0: (0, 'insert', data)
                # version 1: (1, type, data, [state])
                #   NOTE: types 'delete_groups', 'merge' and 'unmerge' are ignored
                # version 2: (2, type, data, [state])
                type_, event = message[1:3]
                if type_ == 'insert':
                    action_type = ProcessorAction.INSERT
                    try:
                        processed = self.process_insert(event, metadata)
                    except EventTooOld:
                        return None
                else:
                    if version == 0:
                        raise InvalidMessageType(
                            "Invalid message type: {}".format(type_))
                    elif version == 1:
                        if type_ in ('delete_groups', 'merge', 'unmerge'):
                            # these didn't contain the necessary data to handle replacements
                            return None
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))
                    elif version == 2:
                        # we temporarily sent these invalid message types from Sentry
                        if type_ in ('delete_groups', 'merge'):
                            return None

                        if type_ in ('start_delete_groups', 'start_merge',
                                     'start_unmerge', 'start_delete_tag',
                                     'end_delete_groups', 'end_merge',
                                     'end_unmerge', 'end_delete_tag'):
                            # pass raw events along to republish
                            action_type = ProcessorAction.REPLACE
                            processed = (str(event['project_id']), message)
                        else:
                            raise InvalidMessageType(
                                "Invalid message type: {}".format(type_))

        if action_type is None:
            raise InvalidMessageVersion("Unknown message format: " +
                                        str(message))

        if processed is None:
            return None

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )
예제 #11
0
def test_simple():
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        request_body,
        get_storage(
            "events").get_schemas().get_read_schema().get_data_source(),
    )

    request = Request(
        uuid.UUID("a" * 32).hex, query, HTTPRequestSettings(), {}, "search")

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        dataset=get_dataset("events"),
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                stats={"sample": 10},
                status="success",
                trace_id="b" * 32)
        ]).to_dict()

    processor = (enforce_table_writer(
        get_dataset("querylog")).get_stream_loader().get_processor())

    assert processor.process_message(
        message
    ) == ProcessedMessage(ProcessorAction.INSERT, [{
        "request_id":
        str(uuid.UUID("a" * 32)),
        "request_body":
        '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
        "referrer":
        "search",
        "dataset":
        get_dataset("events"),
        "projects": [1],
        "organization":
        None,
        "timestamp":
        timer.for_json()["timestamp"],
        "duration_ms":
        10,
        "status":
        "success",
        "clickhouse_queries.sql": [
            "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
        ],
        "clickhouse_queries.status": ["success"],
        "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
        "clickhouse_queries.duration_ms": [0],
        "clickhouse_queries.stats": ['{"sample": 10}'],
        "clickhouse_queries.final": [0],
        "clickhouse_queries.cache_hit": [0],
        "clickhouse_queries.sample": [10.],
        "clickhouse_queries.max_threads": [0],
        "clickhouse_queries.num_days": [0],
        "clickhouse_queries.clickhouse_table": [""],
        "clickhouse_queries.query_id": [""],
        "clickhouse_queries.is_duplicate": [0],
        "clickhouse_queries.consistent": [0],
    }])
예제 #12
0
class TestSnapshotWorker:

    test_data = [
        (
            INSERT_MSG % {
                "xid": 90
            },
            None,
        ),
        (
            INSERT_MSG % {
                "xid": 100
            },
            None,
        ),
        (
            INSERT_MSG % {
                "xid": 110
            },
            None,
        ),
        (
            INSERT_MSG % {
                "xid": 120
            },
            ProcessedMessage(
                action=ProcessorAction.INSERT,
                data=[PROCESSED],
            ),
        ),
        (
            INSERT_MSG % {
                "xid": 210
            },
            ProcessedMessage(
                action=ProcessorAction.INSERT,
                data=[PROCESSED],
            ),
        ),
    ]

    @pytest.mark.parametrize("value, expected", test_data)
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
예제 #13
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT
        processed = {"deleted": 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != "insert":
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data["timestamp"]),
        )
        if not data.get("contexts", {}).get("trace"):
            return None

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        try:
            processed["event_id"] = str(uuid.UUID(processed["event_id"]))
            processed["trace_id"] = str(uuid.UUID(trace_id))
            processed["span_id"] = int(transaction_ctx["span_id"], 16)
            processed["transaction_op"] = _unicodify(
                transaction_ctx.get("op", ""))
            processed["transaction_name"] = _unicodify(data["transaction"])
            processed[
                "start_ts"], processed["start_ms"] = self.__extract_timestamp(
                    data["start_timestamp"], )

            status = transaction_ctx.get("status", None)
            if status:
                int_status = SPAN_STATUS_NAME_TO_CODE.get(
                    status, UNKNOWN_SPAN_STATUS)
            else:
                int_status = UNKNOWN_SPAN_STATUS

            processed["transaction_status"] = int_status

            if data["timestamp"] - data["start_timestamp"] < 0:
                # Seems we have some negative durations in the DB
                metrics.increment("negative_duration")
        except Exception:
            # all these fields are required but we saw some events go through here
            # in the past.  For now bail.
            return
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        duration_secs = (processed["finish_ts"] -
                         processed["start_ts"]).total_seconds()
        processed["duration"] = max(int(duration_secs * 1000), 0)

        processed["platform"] = _unicodify(event["platform"])

        tags = _as_dict_safe(data.get("tags", None))
        processed["tags.key"], processed["tags.value"] = extract_extra_tags(
            tags)
        processed["_tags_flattened"] = flatten_nested_field(
            processed["tags.key"], processed["tags.value"])

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get("contexts", None))

        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}
        geo = user_dict.get("geo", None) or {}
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        processed["contexts.key"], processed[
            "contexts.value"] = extract_extra_contexts(contexts)
        processed["_contexts_flattened"] = flatten_nested_field(
            processed["contexts.key"], processed["contexts.value"])

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, user_dict)
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        if metadata is not None:
            processed["partition"] = metadata.partition
            processed["offset"] = metadata.offset

        sdk = data.get("sdk", None) or {}
        processed["sdk_name"] = _unicodify(sdk.get("name", ""))
        processed["sdk_version"] = _unicodify(sdk.get("version", ""))

        if processed["sdk_name"] == "":
            metrics.increment("missing_sdk_name")
        if processed["sdk_version"] == "":
            metrics.increment("missing_sdk_version")

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )
예제 #14
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT
        processed = {'deleted': 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != 'insert':
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data['timestamp']),
        )

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        processed["event_id"] = str(uuid.UUID(processed["event_id"]))
        processed["trace_id"] = str(uuid.UUID(trace_id))
        processed["span_id"] = int(transaction_ctx["span_id"], 16)
        processed["transaction_op"] = _unicodify(transaction_ctx.get("op", ""))
        processed["transaction_name"] = _unicodify(data["transaction"])

        processed["start_ts"], processed[
            "start_ms"] = self.__extract_timestamp(data["start_timestamp"], )
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        processed['platform'] = _unicodify(event['platform'])

        tags = _as_dict_safe(data.get('tags', None))
        extract_extra_tags(processed, tags)

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get('contexts', None))
        extract_extra_contexts(processed, contexts)

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, data.get("user", {}))
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        if metadata is not None:
            processed['partition'] = metadata.partition
            processed['offset'] = metadata.offset

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )
예제 #15
0
class TestSnapshotWorker:

    test_data = [
        (
            INSERT_MSG % {
                "xid": 90
            },
            None,
        ), (
            INSERT_MSG % {
                "xid": 100
            },
            None,
        ), (
            INSERT_MSG % {
                "xid": 110
            },
            None,
        ),
        (INSERT_MSG % {
            "xid": 120
        }, ProcessedMessage(
            action=ProcessorAction.INSERT,
            data=[PROCESSED],
        )),
        (INSERT_MSG % {
            "xid": 210
        }, ProcessedMessage(
            action=ProcessorAction.INSERT,
            data=[PROCESSED],
        ))
    ]

    @pytest.mark.parametrize("message, expected", test_data)
    def test_send_message(
        self,
        message: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        dataset = get_dataset("groupedmessage")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            dataset=dataset,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        ret = worker.process_message(
            KafkaMessage(
                TopicPartition('topic', 0),
                1,
                message.encode('utf-8'),
            ))
        assert ret == expected
예제 #16
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT
        processed = {'deleted': 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != 'insert':
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data['timestamp']),
        )
        if not data.get('contexts', {}).get('trace'):
            return None

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        try:
            processed["event_id"] = str(uuid.UUID(processed["event_id"]))
            processed["trace_id"] = str(uuid.UUID(trace_id))
            processed["span_id"] = int(transaction_ctx["span_id"], 16)
            processed["transaction_op"] = _unicodify(
                transaction_ctx.get("op", ""))
            processed["transaction_name"] = _unicodify(data["transaction"])
            processed[
                "start_ts"], processed["start_ms"] = self.__extract_timestamp(
                    data["start_timestamp"], )
            if data["timestamp"] - data["start_timestamp"] < 0:
                # Seems we have some negative durations in the DB
                metrics.increment('negative_duration')
        except Exception:
            # all these fields are required but we saw some events go through here
            # in the past.  For now bail.
            return
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        duration_secs = (processed["finish_ts"] -
                         processed["start_ts"]).total_seconds()
        processed['duration'] = max(int(duration_secs * 1000), 0)

        processed['platform'] = _unicodify(event['platform'])

        tags = _as_dict_safe(data.get('tags', None))
        extract_extra_tags(processed, tags)

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get('contexts', None))
        extract_extra_contexts(processed, contexts)

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, data.get("user", {}))
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        if metadata is not None:
            processed['partition'] = metadata.partition
            processed['offset'] = metadata.offset

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )