Exemple #1
0
    def __init__(self) -> None:
        all_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("timestamp", DateTime()),
            ("event_id", WithCodecs(UUID(), ["NONE"])),
            (
                "event_hash",
                WithCodecs(
                    Materialized(
                        UInt(64),
                        "cityHash64(toString(event_id))",
                    ),
                    ["NONE"],
                ),
            ),
            ("platform", LowCardinality(String())),
            ("environment", LowCardinality(Nullable(String()))),
            ("release", LowCardinality(Nullable(String()))),
            ("dist", LowCardinality(Nullable(String()))),
            ("ip_address_v4", Nullable(IPv4())),
            ("ip_address_v6", Nullable(IPv6())),
            ("user", WithDefault(String(), "''")),
            (
                "user_hash",
                Materialized(UInt(64), "cityHash64(user)"),
            ),
            ("user_id", Nullable(String())),
            ("user_name", Nullable(String())),
            ("user_email", Nullable(String())),
            ("sdk_name", LowCardinality(Nullable(String()))),
            ("sdk_version", LowCardinality(Nullable(String()))),
            ("tags", Nested([("key", String()), ("value", String())])),
            ("_tags_flattened", String()),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("_contexts_flattened", String()),
            ("transaction_name", WithDefault(LowCardinality(String()), "''")),
            (
                "transaction_hash",
                Materialized(UInt(64), "cityHash64(transaction_name)"),
            ),
            ("span_id", Nullable(UInt(64))),
            ("trace_id", Nullable(UUID())),
            ("partition", UInt(16)),
            ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
            ("retention_days", UInt(16)),
            ("deleted", UInt(8)),
            ("group_id", UInt(64)),
            ("primary_hash", FixedString(32)),
            ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")),
            ("event_string", WithCodecs(String(), ["NONE"])),
            ("received", DateTime()),
            ("message", String()),
            ("title", String()),
            ("culprit", String()),
            ("level", LowCardinality(String())),
            ("location", Nullable(String())),
            ("version", LowCardinality(Nullable(String()))),
            ("type", LowCardinality(String())),
            (
                "exception_stacks",
                Nested([
                    ("type", Nullable(String())),
                    ("value", Nullable(String())),
                    ("mechanism_type", Nullable(String())),
                    ("mechanism_handled", Nullable(UInt(8))),
                ]),
            ),
            (
                "exception_frames",
                Nested([
                    ("abs_path", Nullable(String())),
                    ("colno", Nullable(UInt(32))),
                    ("filename", Nullable(String())),
                    ("function", Nullable(String())),
                    ("lineno", Nullable(UInt(32))),
                    ("in_app", Nullable(UInt(8))),
                    ("package", Nullable(String())),
                    ("module", Nullable(String())),
                    ("stack_level", Nullable(UInt(16))),
                ]),
            ),
            ("sdk_integrations", Array(String())),
            ("modules", Nested([("name", String()), ("version", String())])),
        ])

        self.__promoted_tag_columns = {
            "environment": "environment",
            "sentry:release": "release",
            "sentry:dist": "dist",
            "sentry:user": "******",
            "transaction": "transaction_name",
            "level": "level",
        }

        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="errors_local",
            dist_table_name="errors_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by=
            "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)",
            partition_by=
            "(toMonday(timestamp), if(retention_days = 30, 30, 90))",
            version_column="deleted",
            sample_expr="event_hash",
            ttl_expr="timestamp + toIntervalDay(retention_days)",
            settings={"index_granularity": "8192"},
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=ErrorsProcessor(self.__promoted_tag_columns),
                default_topic="events",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Exemple #2
0
    WritableTableSchema,
)
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.processors.prewhere import PrewhereProcessor

WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local"
WRITE_DIST_TABLE_NAME = "outcomes_raw_dist"
READ_LOCAL_TABLE_NAME = "outcomes_hourly_local"
READ_DIST_TABLE_NAME = "outcomes_hourly_dist"

write_columns = ColumnSet(
    [
        ("org_id", UInt(64)),
        ("project_id", UInt(64)),
        ("key_id", Nullable(UInt(64))),
        ("timestamp", DateTime()),
        ("outcome", UInt(8)),
        ("reason", Nullable(String())),
        ("event_id", Nullable(UUID())),
    ]
)

raw_schema = WritableTableSchema(
    columns=write_columns,
    # TODO: change to outcomes.raw_local when we add multi DB support
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
)
Exemple #3
0
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.querylog_processor import QuerylogProcessor
from snuba.datasets.schemas.tables import WritableTableSchema
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader

columns = ColumnSet(
    [
        ("request_id", UUID()),
        ("request_body", String()),
        ("referrer", String()),
        ("dataset", String()),
        ("projects", Array(UInt(64))),
        ("organization", Nullable(UInt(64))),
        ("timestamp", DateTime()),
        ("duration_ms", UInt(32)),
        ("status", String()),
        # clickhouse_queries Nested columns.
        # This is expanded into arrays instead of being expressed as a
        # Nested column because, when adding new columns to a nested field
        # we need to provide a default for the entire array (each new column
        # is an array).
        # The same schema cannot be achieved with the Nested construct (where
        # we can only provide default for individual values), so, if we
        # use the Nested construct, this schema cannot match the one generated
        # by the migration framework (or by any ALTER statement).
        ("clickhouse_queries.sql", Array(String())),
        ("clickhouse_queries.status", Array(String())),
        ("clickhouse_queries.trace_id", Array(Nullable(UUID()))),