storage_set_key=StorageSetKey.METRICS, ), query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=SetsMetricsProcessor(), default_topic=Topic.METRICS, ), ) counters_buckets = WritableTableStorage( storage_key=StorageKey.METRICS_COUNTERS_BUCKETS, storage_set_key=StorageSetKey.METRICS, schema=WritableTableSchema( columns=ColumnSet([ *PRE_VALUE_COLUMNS, Column("value", Float(64)), *POST_VALUE_COLUMNS ]), local_table_name="metrics_counters_buckets_local", dist_table_name="metrics_counters_buckets_dist", storage_set_key=StorageSetKey.METRICS, ), query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=CounterMetricsProcessor(), default_topic=Topic.METRICS, ), ) distributions_buckets = WritableTableStorage( storage_key=StorageKey.METRICS_DISTRIBUTIONS_BUCKETS, storage_set_key=StorageSetKey.METRICS,
String, UInt, ) from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), ( ("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)]), ), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions ( ("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", [UInt(8)]), ), ( ("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", [UUID(), UInt(8)]), ), (
("stack_level", UInt(16)), ]), ), ("modules", Nested([("name", String()), ("version", String())])), ]) TRANSACTIONS_COLUMNS = ColumnSet([ ("trace_id", UUID(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ("transaction_hash", UInt(64, Modifiers(nullable=True))), ("transaction_op", String(Modifiers(nullable=True))), ("transaction_status", UInt(8, Modifiers(nullable=True))), ("duration", UInt(32, Modifiers(nullable=True))), ( "measurements", Nested([("key", String()), ("value", Float(64))]), ), ]) events_translation_mappers = TranslationMappers( columns=[DefaultNoneColumnMapper(TRANSACTIONS_COLUMNS)], functions=[DefaultNoneFunctionMapper({"apdex", "failure_rate"})], subscriptables=[DefaultNoneSubscriptMapper({"measurements"})], ) transaction_translation_mappers = TranslationMappers( columns=[ ColumnToLiteral(None, "group_id", 0), DefaultNoneColumnMapper(EVENTS_COLUMNS), ], functions=[DefaultNoneFunctionMapper({"isHandled", "notHandled"})],
Column("projects", Array(UInt(64))), Column("organization", UInt(64, Modifiers(nullable=True))), Column("timestamp", DateTime()), Column("duration_ms", UInt(32)), Column("status", status_type), Column( "clickhouse_queries", Nested([ Column("sql", String()), Column("status", status_type), Column("trace_id", UUID(Modifiers(nullable=True))), Column("duration_ms", UInt(32)), Column("stats", String()), Column("final", UInt(8)), Column("cache_hit", UInt(8)), Column("sample", Float(32)), Column("max_threads", UInt(8)), Column("num_days", UInt(32)), Column("clickhouse_table", String(Modifiers(low_cardinality=True))), Column("query_id", String()), Column("is_duplicate", UInt(8)), Column("consistent", UInt(8)), ]), ), ] class Migration(migration.MultiStepMigration): blocking = False
Column("sdk_name", String(Modifiers(nullable=True))), Column("sdk_version", String(Modifiers(nullable=True))), Column("type", String(Modifiers(nullable=True))), Column("version", String(Modifiers(nullable=True))), Column("offset", UInt(64, Modifiers(nullable=True))), Column("partition", UInt(16, Modifiers(nullable=True))), Column("message_timestamp", DateTime()), Column("os_build", String(Modifiers(nullable=True))), Column("os_kernel_version", String(Modifiers(nullable=True))), Column("device_name", String(Modifiers(nullable=True))), Column("device_brand", String(Modifiers(nullable=True))), Column("device_locale", String(Modifiers(nullable=True))), Column("device_uuid", String(Modifiers(nullable=True))), Column("device_model_id", String(Modifiers(nullable=True))), Column("device_arch", String(Modifiers(nullable=True))), Column("device_battery_level", Float(32, Modifiers(nullable=True))), Column("device_orientation", String(Modifiers(nullable=True))), Column("device_simulator", UInt(8, Modifiers(nullable=True))), Column("device_online", UInt(8, Modifiers(nullable=True))), Column("device_charging", UInt(8, Modifiers(nullable=True))), Column("level", String(Modifiers(nullable=True))), Column("logger", String(Modifiers(nullable=True))), Column("server_name", String(Modifiers(nullable=True))), Column("transaction", String(Modifiers(nullable=True))), Column("environment", String(Modifiers(nullable=True))), Column("sentry:release", String(Modifiers(nullable=True))), Column("sentry:dist", String(Modifiers(nullable=True))), Column("sentry:user", String(Modifiers(nullable=True))), Column("site", String(Modifiers(nullable=True))), Column("url", String(Modifiers(nullable=True))), Column("app_device", String(Modifiers(nullable=True))),
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return get_forward_bucket_table_dist( local_table_name="metrics_distributions_buckets_local", dist_table_name="metrics_distributions_buckets_dist", value_cols=[Column("values", Array(Float(64)))], )
pytest.param( FixedString(32, Modifier(nullable=True)), FixedString(32), FixedString(64, Modifier(nullable=True)), "Nullable(FixedString(32))", id="fixed strings", ), pytest.param( UInt(8, Modifier(nullable=True)), UInt(8), UInt(16, Modifier(nullable=True)), "Nullable(UInt8)", id="integers", ), pytest.param( Float(64, Modifier(nullable=True)), Float(64), Float(32, Modifier(nullable=True)), "Nullable(Float64)", id="floats", ), pytest.param( Date(), Date(), Date(Modifier(nullable=True)), "Date", id="dates", ), pytest.param( DateTime(), DateTime(),
def visit_float( self, node: Node, visited_children: Iterable[Any] ) -> ColumnType[MigrationModifiers]: size = int(node.children[1].text) return Float(size)
("browser_name", Nullable(String())), ("os", Nullable(String())), ("os_name", Nullable(String())), ("os_rooted", Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ("os_build", Nullable(String())), ("os_kernel_version", Nullable(String())), ("device_name", Nullable(String())), ("device_brand", Nullable(String())), ("device_locale", Nullable(String())), ("device_uuid", Nullable(String())), ("device_model_id", Nullable(String())), ("device_arch", Nullable(String())), ("device_battery_level", Nullable(Float(32))), ("device_orientation", Nullable(String())), ("device_simulator", Nullable(UInt(8))), ("device_online", Nullable(UInt(8))), ("device_charging", Nullable(UInt(8))), ]) required_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ])
def __init__(self): metadata_columns = ColumnSet([ # optional stream related data ('offset', Nullable(UInt(64))), ('partition', Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ('level', Nullable(String())), ('logger', Nullable(String())), ('server_name', Nullable(String())), # future name: device_id? ('transaction', Nullable(String())), ('environment', Nullable(String())), ('sentry:release', Nullable(String())), ('sentry:dist', Nullable(String())), ('sentry:user', Nullable(String())), ('site', Nullable(String())), ('url', Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ('app_device', Nullable(String())), ('device', Nullable(String())), ('device_family', Nullable(String())), ('runtime', Nullable(String())), ('runtime_name', Nullable(String())), ('browser', Nullable(String())), ('browser_name', Nullable(String())), ('os', Nullable(String())), ('os_name', Nullable(String())), ('os_rooted', Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ('os_build', Nullable(String())), ('os_kernel_version', Nullable(String())), ('device_name', Nullable(String())), ('device_brand', Nullable(String())), ('device_locale', Nullable(String())), ('device_uuid', Nullable(String())), ('device_model_id', Nullable(String())), ('device_arch', Nullable(String())), ('device_battery_level', Nullable(Float(32))), ('device_orientation', Nullable(String())), ('device_simulator', Nullable(UInt(8))), ('device_online', Nullable(UInt(8))), ('device_charging', Nullable(UInt(8))), ]) required_columns = ColumnSet([ ('event_id', FixedString(32)), ('project_id', UInt(64)), ('group_id', UInt(64)), ('timestamp', DateTime()), ('deleted', UInt(8)), ('retention_days', UInt(16)), ]) all_columns = required_columns + [ # required for non-deleted ('platform', Nullable(String())), ('message', Nullable(String())), ('primary_hash', Nullable(FixedString(32))), ('received', Nullable(DateTime())), ('search_message', Nullable(String())), ('title', Nullable(String())), ('location', Nullable(String())), # optional user ('user_id', Nullable(String())), ('username', Nullable(String())), ('email', Nullable(String())), ('ip_address', Nullable(String())), # optional geo ('geo_country_code', Nullable(String())), ('geo_region', Nullable(String())), ('geo_city', Nullable(String())), ('sdk_name', Nullable(String())), ('sdk_version', Nullable(String())), ('type', Nullable(String())), ('version', Nullable(String())), ] + metadata_columns \ + promoted_context_columns \ + promoted_tag_columns \ + promoted_context_tag_columns \ + [ # other tags ('tags', Nested([ ('key', String()), ('value', String()), ])), # other context ('contexts', Nested([ ('key', String()), ('value', String()), ])), # http interface ('http_method', Nullable(String())), ('http_referer', Nullable(String())), # exception interface ('exception_stacks', Nested([ ('type', Nullable(String())), ('value', Nullable(String())), ('mechanism_type', Nullable(String())), ('mechanism_handled', Nullable(UInt(8))), ])), ('exception_frames', Nested([ ('abs_path', Nullable(String())), ('filename', Nullable(String())), ('package', Nullable(String())), ('module', Nullable(String())), ('function', Nullable(String())), ('in_app', Nullable(UInt(8))), ('colno', Nullable(UInt(32))), ('lineno', Nullable(UInt(32))), ('stack_level', UInt(16)), ])), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ('culprit', Nullable(String())), ('sdk_integrations', Array(String())), ('modules', Nested([ ('name', String()), ('version', String()), ])), ] sample_expr = 'cityHash64(toString(event_id))' schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name='sentry_local', dist_table_name='sentry_dist', order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr, partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))', version_column='deleted', sample_expr=sample_expr, migration_function=events_migrations) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, processor=EventsProcessor(promoted_tag_columns), default_topic="events", default_replacement_topic="event-replacements", default_commit_log_topic="snuba-commit-log", time_group_columns={ 'time': 'timestamp', 'rtime': 'received' }, ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns
# This is expanded into arrays instead of being expressed as a # Nested column because, when adding new columns to a nested field # we need to provide a default for the entire array (each new column # is an array). # The same schema cannot be achieved with the Nested construct (where # we can only provide default for individual values), so, if we # use the Nested construct, this schema cannot match the one generated # by the migration framework (or by any ALTER statement). ("clickhouse_queries.sql", Array(String())), ("clickhouse_queries.status", Array(LowCardinality(String()))), ("clickhouse_queries.trace_id", Array(Nullable(UUID()))), ("clickhouse_queries.duration_ms", Array(UInt(32))), ("clickhouse_queries.stats", Array(String())), ("clickhouse_queries.final", Array(UInt(8))), ("clickhouse_queries.cache_hit", Array(UInt(8))), ("clickhouse_queries.sample", Array(Float(32))), ("clickhouse_queries.max_threads", Array(UInt(8))), ("clickhouse_queries.num_days", Array(UInt(32))), ("clickhouse_queries.clickhouse_table", Array(LowCardinality(String()))), ("clickhouse_queries.query_id", Array(String())), # XXX: ``is_duplicate`` is currently not set when using the # ``Cache.get_readthrough`` query execution path. See GH-902. ("clickhouse_queries.is_duplicate", Array(UInt(8))), ("clickhouse_queries.consistent", Array(UInt(8))), ( "clickhouse_queries.all_columns", WithDefault(Array(Array(LowCardinality(String()))), NESTED_ARRAY_DEFAULT), ), ( "clickhouse_queries.or_conditions",
def forwards_local(self) -> Sequence[operations.SqlOperation]: return get_forward_bucket_table_local( table_name="metrics_counters_buckets_local", value_cols=[Column("value", Float(64))], )
def __init__(self) -> None: metadata_columns = ColumnSet([ # optional stream related data ("offset", Nullable(UInt(64))), ("partition", Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), # future name: device_id? ("transaction", Nullable(String())), ("environment", Nullable(String())), ("sentry:release", Nullable(String())), ("sentry:dist", Nullable(String())), ("sentry:user", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ("app_device", Nullable(String())), ("device", Nullable(String())), ("device_family", Nullable(String())), ("runtime", Nullable(String())), ("runtime_name", Nullable(String())), ("browser", Nullable(String())), ("browser_name", Nullable(String())), ("os", Nullable(String())), ("os_name", Nullable(String())), ("os_rooted", Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ("os_build", Nullable(String())), ("os_kernel_version", Nullable(String())), ("device_name", Nullable(String())), ("device_brand", Nullable(String())), ("device_locale", Nullable(String())), ("device_uuid", Nullable(String())), ("device_model_id", Nullable(String())), ("device_arch", Nullable(String())), ("device_battery_level", Nullable(Float(32))), ("device_orientation", Nullable(String())), ("device_simulator", Nullable(UInt(8))), ("device_online", Nullable(UInt(8))), ("device_charging", Nullable(UInt(8))), ]) required_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ]) all_columns = ( required_columns + [ # required for non-deleted ("platform", Nullable(String())), ("message", Nullable(String())), ("primary_hash", Nullable(FixedString(32))), ("received", Nullable(DateTime())), ("search_message", Nullable(String())), ("title", Nullable(String())), ("location", Nullable(String())), # optional user ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # optional geo ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), ("type", Nullable(String())), ("version", Nullable(String())), ] + metadata_columns + promoted_context_columns + promoted_tag_columns + promoted_context_tag_columns + [ # other tags ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), # other context ("contexts", Nested([("key", String()), ("value", String())])), # http interface ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # exception interface ( "exception_stacks", Nested([ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ]), ), ( "exception_frames", Nested([ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ]), ), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ("culprit", Nullable(String())), ("sdk_integrations", Array(String())), ("modules", Nested([("name", String()), ("version", String())])), ]) sample_expr = "cityHash64(toString(event_id))" schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name="sentry_local", dist_table_name="sentry_dist", mandatory_conditions=[("deleted", "=", 0)], prewhere_candidates=[ "event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr, partition_by= "(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))", version_column="deleted", sample_expr=sample_expr, migration_function=events_migrations, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ), ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ "time": "timestamp", "rtime": "received" }, time_parse_columns=("timestamp", "received"), ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )
""" return ProduceInvalidMessagePolicy( KafkaProducer( build_kafka_producer_configuration(Topic.DEAD_LETTER_METRICS)), KafkaTopic(Topic.DEAD_LETTER_METRICS.value), ) polymorphic_bucket = WritableTableStorage( storage_key=StorageKey.METRICS_RAW, storage_set_key=StorageSetKey.METRICS, schema=WritableTableSchema( columns=ColumnSet([ Column("use_case_id", String()), *PRE_VALUE_COLUMNS, Column("count_value", Float(64)), Column("set_values", Array(UInt(64))), Column("distribution_values", Array(Float(64))), *POST_VALUE_COLUMNS, ]), local_table_name="metrics_raw_v2_local", dist_table_name="metrics_raw_v2_dist", storage_set_key=StorageSetKey.METRICS, ), query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=PolymorphicMetricsProcessor(), default_topic=Topic.METRICS, commit_log_topic=Topic.METRICS_COMMIT_LOG, subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL, subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_METRICS,