예제 #1
0
        storage_set_key=StorageSetKey.METRICS,
    ),
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=SetsMetricsProcessor(),
        default_topic=Topic.METRICS,
    ),
)

counters_buckets = WritableTableStorage(
    storage_key=StorageKey.METRICS_COUNTERS_BUCKETS,
    storage_set_key=StorageSetKey.METRICS,
    schema=WritableTableSchema(
        columns=ColumnSet([
            *PRE_VALUE_COLUMNS,
            Column("value", Float(64)), *POST_VALUE_COLUMNS
        ]),
        local_table_name="metrics_counters_buckets_local",
        dist_table_name="metrics_counters_buckets_dist",
        storage_set_key=StorageSetKey.METRICS,
    ),
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=CounterMetricsProcessor(),
        default_topic=Topic.METRICS,
    ),
)

distributions_buckets = WritableTableStorage(
    storage_key=StorageKey.METRICS_DISTRIBUTIONS_BUCKETS,
    storage_set_key=StorageSetKey.METRICS,
예제 #2
0
    String,
    UInt,
)
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.migrations.parse_schema import _get_column

test_data = [
    # Basic types
    (("Date", "", "", ""), Date()),
    (("DateTime", "", "", ""), DateTime()),
    (
        ("Enum8('success' = 0, 'error' = 1)", "", "", ""),
        Enum([("success", 0), ("error", 1)]),
    ),
    (("FixedString(32)", "", "", ""), FixedString(32)),
    (("Float32", "", "", ""), Float(32)),
    (("IPv4", "", "", ""), IPv4()),
    (("IPv6", "", "", ""), IPv6()),
    (("String", "", "", ""), String()),
    (("UInt32", "", "", ""), UInt(32)),
    (("UUID", "", "", ""), UUID()),
    # Aggregate functions
    (
        ("AggregateFunction(uniq, UInt8)", "", "", ""),
        AggregateFunction("uniq", [UInt(8)]),
    ),
    (
        ("AggregateFunction(countIf, UUID, UInt8)", "", "", ""),
        AggregateFunction("countIf", [UUID(), UInt(8)]),
    ),
    (
예제 #3
0
파일: discover.py 프로젝트: fpacifici/snuba
            ("stack_level", UInt(16)),
        ]),
    ),
    ("modules", Nested([("name", String()), ("version", String())])),
])

TRANSACTIONS_COLUMNS = ColumnSet([
    ("trace_id", UUID(Modifiers(nullable=True))),
    ("span_id", UInt(64, Modifiers(nullable=True))),
    ("transaction_hash", UInt(64, Modifiers(nullable=True))),
    ("transaction_op", String(Modifiers(nullable=True))),
    ("transaction_status", UInt(8, Modifiers(nullable=True))),
    ("duration", UInt(32, Modifiers(nullable=True))),
    (
        "measurements",
        Nested([("key", String()), ("value", Float(64))]),
    ),
])

events_translation_mappers = TranslationMappers(
    columns=[DefaultNoneColumnMapper(TRANSACTIONS_COLUMNS)],
    functions=[DefaultNoneFunctionMapper({"apdex", "failure_rate"})],
    subscriptables=[DefaultNoneSubscriptMapper({"measurements"})],
)

transaction_translation_mappers = TranslationMappers(
    columns=[
        ColumnToLiteral(None, "group_id", 0),
        DefaultNoneColumnMapper(EVENTS_COLUMNS),
    ],
    functions=[DefaultNoneFunctionMapper({"isHandled", "notHandled"})],
예제 #4
0
    Column("projects", Array(UInt(64))),
    Column("organization", UInt(64, Modifiers(nullable=True))),
    Column("timestamp", DateTime()),
    Column("duration_ms", UInt(32)),
    Column("status", status_type),
    Column(
        "clickhouse_queries",
        Nested([
            Column("sql", String()),
            Column("status", status_type),
            Column("trace_id", UUID(Modifiers(nullable=True))),
            Column("duration_ms", UInt(32)),
            Column("stats", String()),
            Column("final", UInt(8)),
            Column("cache_hit", UInt(8)),
            Column("sample", Float(32)),
            Column("max_threads", UInt(8)),
            Column("num_days", UInt(32)),
            Column("clickhouse_table",
                   String(Modifiers(low_cardinality=True))),
            Column("query_id", String()),
            Column("is_duplicate", UInt(8)),
            Column("consistent", UInt(8)),
        ]),
    ),
]


class Migration(migration.MultiStepMigration):
    blocking = False
예제 #5
0
 Column("sdk_name", String(Modifiers(nullable=True))),
 Column("sdk_version", String(Modifiers(nullable=True))),
 Column("type", String(Modifiers(nullable=True))),
 Column("version", String(Modifiers(nullable=True))),
 Column("offset", UInt(64, Modifiers(nullable=True))),
 Column("partition", UInt(16, Modifiers(nullable=True))),
 Column("message_timestamp", DateTime()),
 Column("os_build", String(Modifiers(nullable=True))),
 Column("os_kernel_version", String(Modifiers(nullable=True))),
 Column("device_name", String(Modifiers(nullable=True))),
 Column("device_brand", String(Modifiers(nullable=True))),
 Column("device_locale", String(Modifiers(nullable=True))),
 Column("device_uuid", String(Modifiers(nullable=True))),
 Column("device_model_id", String(Modifiers(nullable=True))),
 Column("device_arch", String(Modifiers(nullable=True))),
 Column("device_battery_level", Float(32, Modifiers(nullable=True))),
 Column("device_orientation", String(Modifiers(nullable=True))),
 Column("device_simulator", UInt(8, Modifiers(nullable=True))),
 Column("device_online", UInt(8, Modifiers(nullable=True))),
 Column("device_charging", UInt(8, Modifiers(nullable=True))),
 Column("level", String(Modifiers(nullable=True))),
 Column("logger", String(Modifiers(nullable=True))),
 Column("server_name", String(Modifiers(nullable=True))),
 Column("transaction", String(Modifiers(nullable=True))),
 Column("environment", String(Modifiers(nullable=True))),
 Column("sentry:release", String(Modifiers(nullable=True))),
 Column("sentry:dist", String(Modifiers(nullable=True))),
 Column("sentry:user", String(Modifiers(nullable=True))),
 Column("site", String(Modifiers(nullable=True))),
 Column("url", String(Modifiers(nullable=True))),
 Column("app_device", String(Modifiers(nullable=True))),
예제 #6
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return get_forward_bucket_table_dist(
         local_table_name="metrics_distributions_buckets_local",
         dist_table_name="metrics_distributions_buckets_dist",
         value_cols=[Column("values", Array(Float(64)))],
     )
예제 #7
0
 pytest.param(
     FixedString(32, Modifier(nullable=True)),
     FixedString(32),
     FixedString(64, Modifier(nullable=True)),
     "Nullable(FixedString(32))",
     id="fixed strings",
 ),
 pytest.param(
     UInt(8, Modifier(nullable=True)),
     UInt(8),
     UInt(16, Modifier(nullable=True)),
     "Nullable(UInt8)",
     id="integers",
 ),
 pytest.param(
     Float(64, Modifier(nullable=True)),
     Float(64),
     Float(32, Modifier(nullable=True)),
     "Nullable(Float64)",
     id="floats",
 ),
 pytest.param(
     Date(),
     Date(),
     Date(Modifier(nullable=True)),
     "Date",
     id="dates",
 ),
 pytest.param(
     DateTime(),
     DateTime(),
예제 #8
0
 def visit_float(
     self, node: Node, visited_children: Iterable[Any]
 ) -> ColumnType[MigrationModifiers]:
     size = int(node.children[1].text)
     return Float(size)
예제 #9
0
    ("browser_name", Nullable(String())),
    ("os", Nullable(String())),
    ("os_name", Nullable(String())),
    ("os_rooted", Nullable(UInt(8))),
])

promoted_context_columns = ColumnSet([
    ("os_build", Nullable(String())),
    ("os_kernel_version", Nullable(String())),
    ("device_name", Nullable(String())),
    ("device_brand", Nullable(String())),
    ("device_locale", Nullable(String())),
    ("device_uuid", Nullable(String())),
    ("device_model_id", Nullable(String())),
    ("device_arch", Nullable(String())),
    ("device_battery_level", Nullable(Float(32))),
    ("device_orientation", Nullable(String())),
    ("device_simulator", Nullable(UInt(8))),
    ("device_online", Nullable(UInt(8))),
    ("device_charging", Nullable(UInt(8))),
])

required_columns = ColumnSet([
    ("event_id", FixedString(32)),
    ("project_id", UInt(64)),
    ("group_id", UInt(64)),
    ("timestamp", DateTime()),
    ("deleted", UInt(8)),
    ("retention_days", UInt(16)),
])
예제 #10
0
    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            processor=EventsProcessor(promoted_tag_columns),
            default_topic="events",
            default_replacement_topic="event-replacements",
            default_commit_log_topic="snuba-commit-log",
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns
예제 #11
0
 # This is expanded into arrays instead of being expressed as a
 # Nested column because, when adding new columns to a nested field
 # we need to provide a default for the entire array (each new column
 # is an array).
 # The same schema cannot be achieved with the Nested construct (where
 # we can only provide default for individual values), so, if we
 # use the Nested construct, this schema cannot match the one generated
 # by the migration framework (or by any ALTER statement).
 ("clickhouse_queries.sql", Array(String())),
 ("clickhouse_queries.status", Array(LowCardinality(String()))),
 ("clickhouse_queries.trace_id", Array(Nullable(UUID()))),
 ("clickhouse_queries.duration_ms", Array(UInt(32))),
 ("clickhouse_queries.stats", Array(String())),
 ("clickhouse_queries.final", Array(UInt(8))),
 ("clickhouse_queries.cache_hit", Array(UInt(8))),
 ("clickhouse_queries.sample", Array(Float(32))),
 ("clickhouse_queries.max_threads", Array(UInt(8))),
 ("clickhouse_queries.num_days", Array(UInt(32))),
 ("clickhouse_queries.clickhouse_table", Array(LowCardinality(String()))),
 ("clickhouse_queries.query_id", Array(String())),
 # XXX: ``is_duplicate`` is currently not set when using the
 # ``Cache.get_readthrough`` query execution path. See GH-902.
 ("clickhouse_queries.is_duplicate", Array(UInt(8))),
 ("clickhouse_queries.consistent", Array(UInt(8))),
 (
     "clickhouse_queries.all_columns",
     WithDefault(Array(Array(LowCardinality(String()))),
                 NESTED_ARRAY_DEFAULT),
 ),
 (
     "clickhouse_queries.or_conditions",
예제 #12
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return get_forward_bucket_table_local(
         table_name="metrics_counters_buckets_local",
         value_cols=[Column("value", Float(64))],
     )
예제 #13
0
    def __init__(self) -> None:
        metadata_columns = ColumnSet([
            # optional stream related data
            ("offset", Nullable(UInt(64))),
            ("partition", Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ("level", Nullable(String())),
            ("logger", Nullable(String())),
            ("server_name", Nullable(String())),  # future name: device_id?
            ("transaction", Nullable(String())),
            ("environment", Nullable(String())),
            ("sentry:release", Nullable(String())),
            ("sentry:dist", Nullable(String())),
            ("sentry:user", Nullable(String())),
            ("site", Nullable(String())),
            ("url", Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ("app_device", Nullable(String())),
            ("device", Nullable(String())),
            ("device_family", Nullable(String())),
            ("runtime", Nullable(String())),
            ("runtime_name", Nullable(String())),
            ("browser", Nullable(String())),
            ("browser_name", Nullable(String())),
            ("os", Nullable(String())),
            ("os_name", Nullable(String())),
            ("os_rooted", Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ("os_build", Nullable(String())),
            ("os_kernel_version", Nullable(String())),
            ("device_name", Nullable(String())),
            ("device_brand", Nullable(String())),
            ("device_locale", Nullable(String())),
            ("device_uuid", Nullable(String())),
            ("device_model_id", Nullable(String())),
            ("device_arch", Nullable(String())),
            ("device_battery_level", Nullable(Float(32))),
            ("device_orientation", Nullable(String())),
            ("device_simulator", Nullable(UInt(8))),
            ("device_online", Nullable(UInt(8))),
            ("device_charging", Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("group_id", UInt(64)),
            ("timestamp", DateTime()),
            ("deleted", UInt(8)),
            ("retention_days", UInt(16)),
        ])

        all_columns = (
            required_columns + [
                # required for non-deleted
                ("platform", Nullable(String())),
                ("message", Nullable(String())),
                ("primary_hash", Nullable(FixedString(32))),
                ("received", Nullable(DateTime())),
                ("search_message", Nullable(String())),
                ("title", Nullable(String())),
                ("location", Nullable(String())),
                # optional user
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # optional geo
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                ("type", Nullable(String())),
                ("version", Nullable(String())),
            ] + metadata_columns + promoted_context_columns +
            promoted_tag_columns + promoted_context_tag_columns + [
                # other tags
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                # other context
                ("contexts", Nested([("key", String()), ("value", String())])),
                # http interface
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested([
                        ("type", Nullable(String())),
                        ("value", Nullable(String())),
                        ("mechanism_type", Nullable(String())),
                        ("mechanism_handled", Nullable(UInt(8))),
                    ]),
                ),
                (
                    "exception_frames",
                    Nested([
                        ("abs_path", Nullable(String())),
                        ("filename", Nullable(String())),
                        ("package", Nullable(String())),
                        ("module", Nullable(String())),
                        ("function", Nullable(String())),
                        ("in_app", Nullable(UInt(8))),
                        ("colno", Nullable(UInt(32))),
                        ("lineno", Nullable(UInt(32))),
                        ("stack_level", UInt(16)),
                    ]),
                ),
                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ("culprit", Nullable(String())),
                ("sdk_integrations", Array(String())),
                ("modules", Nested([("name", String()),
                                    ("version", String())])),
            ])

        sample_expr = "cityHash64(toString(event_id))"
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="sentry_local",
            dist_table_name="sentry_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr,
            partition_by=
            "(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))",
            version_column="deleted",
            sample_expr=sample_expr,
            migration_function=events_migrations,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            ),
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
예제 #14
0
    """
    return ProduceInvalidMessagePolicy(
        KafkaProducer(
            build_kafka_producer_configuration(Topic.DEAD_LETTER_METRICS)),
        KafkaTopic(Topic.DEAD_LETTER_METRICS.value),
    )


polymorphic_bucket = WritableTableStorage(
    storage_key=StorageKey.METRICS_RAW,
    storage_set_key=StorageSetKey.METRICS,
    schema=WritableTableSchema(
        columns=ColumnSet([
            Column("use_case_id", String()),
            *PRE_VALUE_COLUMNS,
            Column("count_value", Float(64)),
            Column("set_values", Array(UInt(64))),
            Column("distribution_values", Array(Float(64))),
            *POST_VALUE_COLUMNS,
        ]),
        local_table_name="metrics_raw_v2_local",
        dist_table_name="metrics_raw_v2_dist",
        storage_set_key=StorageSetKey.METRICS,
    ),
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=PolymorphicMetricsProcessor(),
        default_topic=Topic.METRICS,
        commit_log_topic=Topic.METRICS_COMMIT_LOG,
        subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL,
        subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_METRICS,