Beispiel #1
0
    def test_missing_trace_context(self) -> None:
        start, finish = self.__get_timestamps()
        message = TransactionEvent(
            event_id="e5e062bf2e1d4afd96fd2f90b6770431",
            trace_id="7400045b25c443b885914600aa83ad04",
            span_id="8841662216cc598b",
            transaction_name="/organizations/:orgId/issues/",
            status="cancelled",
            op="navigation",
            timestamp=finish,
            start_timestamp=start,
            platform="python",
            dist="",
            user_name="me",
            user_id="myself",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
            http_method="POST",
            http_referer="tagstore.something",
            geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"},
        )
        payload = message.serialize()
        # Force an invalid event
        del payload[2]["data"]["contexts"]

        meta = KafkaMessageMetadata(
            offset=1, partition=2, timestamp=datetime(1970, 1, 1)
        )
        processor = TransactionsMessageProcessor()
        assert processor.process_message(payload, meta) is None
Beispiel #2
0
    def test_base_process(self):
        message = TransactionEvent(
            event_id='e5e062bf2e1d4afd96fd2f90b6770431',
            trace_id='7400045b25c443b885914600aa83ad04',
            span_id='8841662216cc598b',
            transaction_name='/organizations/:orgId/issues/',
            op='navigation',
            start_timestamp=1565303393.917,
            timestamp=1565303392.918,
            platform='python',
            dist='',
            user_name='me',
            user_id='myself',
            user_email='*****@*****.**',
            ipv4='127.0.0.1',
            ipv6=None,
            environment='prod',
            release='34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a',
        )
        meta = KafkaMessageMetadata(
            offset=1,
            partition=2,
        )

        processor = TransactionsMessageProcessor()
        ret = processor.process_message(message.serialize(), meta)

        assert ret.action == ProcessorAction.INSERT
        assert ret.data == [message.build_result(meta)]
    def test_missing_trace_context(self):
        message = TransactionEvent(
            event_id='e5e062bf2e1d4afd96fd2f90b6770431',
            trace_id='7400045b25c443b885914600aa83ad04',
            span_id='8841662216cc598b',
            transaction_name='/organizations/:orgId/issues/',
            op='navigation',
            start_timestamp=1565303392.917,
            timestamp=1565303393.918,
            platform='python',
            dist='',
            user_name='me',
            user_id='myself',
            user_email='*****@*****.**',
            ipv4='127.0.0.1',
            ipv6=None,
            environment='prod',
            release='34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a',
        )
        payload = message.serialize()
        # Force an invalid event
        del payload[2]['data']['contexts']

        meta = KafkaMessageMetadata(offset=1, partition=2)
        processor = TransactionsMessageProcessor()
        assert processor.process_message(payload, meta) is None
    def test_base_process(self):
        start, finish = self.__get_timestamps()
        message = TransactionEvent(
            event_id="e5e062bf2e1d4afd96fd2f90b6770431",
            trace_id="7400045b25c443b885914600aa83ad04",
            span_id="8841662216cc598b",
            transaction_name="/organizations/:orgId/issues/",
            status="cancelled",
            op="navigation",
            timestamp=finish,
            start_timestamp=start,
            platform="python",
            dist="",
            user_name="me",
            user_id="myself",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
            geo={
                "country_code": "XY",
                "region": "fake_region",
                "city": "fake_city"
            },
        )
        meta = KafkaMessageMetadata(
            offset=1,
            partition=2,
        )

        processor = TransactionsMessageProcessor()
        ret = processor.process_message(message.serialize(), meta)

        assert ret.action == ProcessorAction.INSERT
        assert ret.data == [message.build_result(meta)]
Beispiel #5
0
    def test_base_process(self) -> None:
        old_skip_context = settings.TRANSACT_SKIP_CONTEXT_STORE
        settings.TRANSACT_SKIP_CONTEXT_STORE = {1: {"experiments"}}

        start, finish = self.__get_timestamps()
        message = TransactionEvent(
            event_id="e5e062bf2e1d4afd96fd2f90b6770431",
            trace_id="7400045b25c443b885914600aa83ad04",
            span_id="8841662216cc598b",
            transaction_name="/organizations/:orgId/issues/",
            status="cancelled",
            op="navigation",
            timestamp=finish,
            start_timestamp=start,
            platform="python",
            dist="",
            user_name="me",
            user_id="myself",
            user_email="*****@*****.**",
            ipv4="127.0.0.1",
            ipv6=None,
            environment="prod",
            release="34a554c14b68285d8a8eb6c5c4c56dfc1db9a83a",
            sdk_name="sentry.python",
            sdk_version="0.9.0",
            http_method="POST",
            http_referer="tagstore.something",
            geo={"country_code": "XY", "region": "fake_region", "city": "fake_city"},
        )
        meta = KafkaMessageMetadata(
            offset=1, partition=2, timestamp=datetime(1970, 1, 1)
        )
        assert TransactionsMessageProcessor().process_message(
            message.serialize(), meta
        ) == InsertBatch([message.build_result(meta)], None)
        settings.TRANSACT_SKIP_CONTEXT_STORE = old_skip_context
Beispiel #6
0
    # during create statement
    # (https://github.com/ClickHouse/ClickHouse/issues/12586), so the
    # materialization is added with a migration.
    skipped_cols_on_creation={"_tags_hash_map"},
)


storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[
        NestedFieldConditionOptimizer(
            "contexts",
            "_contexts_flattened",
            {"start_ts", "finish_ts"},
            BEGINNING_OF_TIME,
        ),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        TransactionColumnProcessor(),
        ArrayJoinKeyValueOptimizer("tags"),
        ArrayJoinKeyValueOptimizer("measurements"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=TransactionsMessageProcessor(), default_topic="events",
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
    writer_options={"insert_allow_materialized_columns": 1},
)
Beispiel #7
0
    # during create statement
    # (https://github.com/ClickHouse/ClickHouse/issues/12586), so the
    # materialization is added with a migration.
    skipped_cols_on_creation={"_tags_hash_map"},
)

storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[
        NestedFieldConditionOptimizer(
            "contexts",
            "_contexts_flattened",
            {"start_ts", "finish_ts"},
            BEGINNING_OF_TIME,
        ),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        TransactionColumnProcessor(),
        ArrayJoinKeyValueOptimizer("tags"),
        ArrayJoinKeyValueOptimizer("measurements"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=TransactionsMessageProcessor(),
        default_topic="events",
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
    writer_options={"insert_allow_materialized_columns": 1},
)
Beispiel #8
0
    def __init__(self) -> None:
        columns = ColumnSet(
            [
                ("project_id", UInt(64)),
                ("event_id", UUID()),
                ("trace_id", UUID()),
                ("span_id", UInt(64)),
                ("transaction_name", LowCardinality(String())),
                (
                    "transaction_hash",
                    Materialized(UInt(64), "cityHash64(transaction_name)",),
                ),
                ("transaction_op", LowCardinality(String())),
                ("transaction_status", WithDefault(UInt(8), UNKNOWN_SPAN_STATUS)),
                ("start_ts", DateTime()),
                ("start_ms", UInt(16)),
                ("_start_date", Materialized(Date(), "toDate(start_ts)"),),
                ("finish_ts", DateTime()),
                ("finish_ms", UInt(16)),
                ("_finish_date", Materialized(Date(), "toDate(finish_ts)"),),
                ("duration", UInt(32)),
                ("platform", LowCardinality(String())),
                ("environment", LowCardinality(Nullable(String()))),
                ("release", LowCardinality(Nullable(String()))),
                ("dist", LowCardinality(Nullable(String()))),
                ("ip_address_v4", Nullable(IPv4())),
                ("ip_address_v6", Nullable(IPv6())),
                ("user", WithDefault(String(), "''",)),
                ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
                ("user_id", Nullable(String())),
                ("user_name", Nullable(String())),
                ("user_email", Nullable(String())),
                ("sdk_name", WithDefault(LowCardinality(String()), "''")),
                ("sdk_version", WithDefault(LowCardinality(String()), "''")),
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                ("contexts", Nested([("key", String()), ("value", String())])),
                ("_contexts_flattened", String()),
                ("partition", UInt(16)),
                ("offset", UInt(64)),
                ("retention_days", UInt(16)),
                ("deleted", UInt(8)),
            ]
        )

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name="transactions_local",
            dist_table_name="transactions_dist",
            mandatory_conditions=[],
            prewhere_candidates=["event_id", "project_id"],
            order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))",
            partition_by="(retention_days, toMonday(_finish_date))",
            version_column="deleted",
            sample_expr=None,
            migration_function=transactions_migrations,
        )

        dataset_schemas = DatasetSchemas(read_schema=schema, write_schema=schema,)

        self.__tags_processor = TagColumnProcessor(
            columns=columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=TransactionsTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=TransactionsMessageProcessor(), default_topic="events",
                ),
            ),
            time_group_columns={
                "bucketed_start": "start_ts",
                "bucketed_end": "finish_ts",
            },
            time_parse_columns=("start_ts", "finish_ts"),
        )
Beispiel #9
0
    def __init__(self):
        columns = ColumnSet([
            ('project_id', UInt(64)),
            ('event_id', UUID()),
            ('trace_id', UUID()),
            ('span_id', UInt(64)),
            ('transaction_name', String()),
            ('transaction_hash',
             Materialized(
                 UInt(64),
                 'cityHash64(transaction_name)',
             )),
            ('transaction_op', LowCardinality(String())),
            ('start_ts', DateTime()),
            ('start_ms', UInt(16)),
            ('finish_ts', DateTime()),
            ('finish_ms', UInt(16)),
            ('duration',
             Materialized(
                 UInt(32),
                 '((finish_ts - start_ts) * 1000) + (finish_ms - start_ms)',
             )),
            ('platform', LowCardinality(String())),
            ('environment', Nullable(String())),
            ('release', Nullable(String())),
            ('dist', Nullable(String())),
            ('ip_address_v4', Nullable(IPv4())),
            ('ip_address_v6', Nullable(IPv6())),
            ('user', WithDefault(
                String(),
                "''",
            )),
            ('user_id', Nullable(String())),
            ('user_name', Nullable(String())),
            ('user_email', Nullable(String())),
            ('tags', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('contexts', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('partition', UInt(16)),
            ('offset', UInt(64)),
            ('retention_days', UInt(16)),
            ('deleted', UInt(8)),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='transactions_local',
            dist_table_name='transactions_dist',
            order_by=
            '(project_id, toStartOfDay(start_ts), transaction_hash, start_ts, start_ms, trace_id, span_id)',
            partition_by='(retention_days, toMonday(start_ts))',
            version_column='deleted',
            sample_expr=None,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            processor=TransactionsMessageProcessor(),
            default_topic="events",
            time_group_columns={
                'bucketed_start': 'start_ts',
                'bucketed_end': 'finish_ts',
            },
        )