def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), ttl_month=("finish_ts", 1), ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return get_forward_migrations_dist( dist_table_name="metrics_counters_dist", local_table_name="metrics_counters_local", aggregation_col_schema=[ Column("value", AggregateFunction("sum", [Float(64)])), ], )
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), ]
def forwards_local(self) -> Sequence[operations.SqlOperation]: return (get_forward_view_migration_local_consolidated( source_table_name="metrics_counters_buckets_local", table_name="metrics_counters_local", mv_name=get_consolidated_mv_name("counters"), aggregation_col_schema=[ Column("value", AggregateFunction("sum", [Float(64)])), ], aggregation_states="sumState(value) as value", ), )
def __init__(self) -> None: super().__init__( writable_storage_key=StorageKey.METRICS_RAW, readable_storage_key=StorageKey.METRICS_COUNTERS, value_schema=[ Column("value", AggregateFunction("sum", [Float(64)])) ], mappers=TranslationMappers(functions=[ FunctionNameMapper("sum", "sumMerge"), FunctionNameMapper("sumIf", "sumMergeIf"), ], ), )
def forwards_dist(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), after="_contexts_flattened", ), ]
def __init__(self) -> None: super().__init__( writable_storage_key=StorageKey.METRICS_RAW, readable_storage_key=StorageKey.METRICS_DISTRIBUTIONS, value_schema=[ Column( "percentiles", AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)", [Float(64)]), ), Column("min", AggregateFunction("min", [Float(64)])), Column("max", AggregateFunction("max", [Float(64)])), Column("avg", AggregateFunction("avg", [Float(64)])), Column("sum", AggregateFunction("sum", [Float(64)])), Column("count", AggregateFunction("count", [Float(64)])), Column( "histogram_buckets", AggregateFunction("histogram(250)", [Float(64)]), ), ], mappers=TranslationMappers( functions=[ AggregateFunctionMapper("value", "min", "minMerge", "min"), AggregateFunctionMapper("value", "minIf", "minMergeIf", "min"), AggregateFunctionMapper("value", "max", "maxMerge", "max"), AggregateFunctionMapper("value", "maxIf", "maxMergeIf", "max"), AggregateFunctionMapper("value", "avg", "avgMerge", "avg"), AggregateFunctionMapper("value", "avgIf", "avgMergeIf", "avg"), AggregateFunctionMapper("value", "sum", "sumMerge", "sum"), AggregateFunctionMapper("value", "sumIf", "sumMergeIf", "sum"), AggregateFunctionMapper("value", "count", "countMerge", "count"), AggregateFunctionMapper("value", "countIf", "countMergeIf", "count"), ], curried_functions=[ AggregateCurriedFunctionMapper("value", "quantiles", "quantilesMerge", "percentiles"), AggregateCurriedFunctionMapper("value", "quantilesIf", "quantilesMergeIf", "percentiles"), AggregateCurriedFunctionMapper("value", "histogram", "histogramMerge", "histogram_buckets"), AggregateCurriedFunctionMapper("value", "histogramIf", "histogramMergeIf", "histogram_buckets"), ], ), )
def __forward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=table_name, column=Column( "histogram_buckets", AggregateFunction("histogram(250)", [Float(64)]), ), after="count", ) ]
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "measurements", Nested([ ("key", String(Modifiers(low_cardinality=True))), ("value", Float(64)), ]), ), after="_contexts_flattened", ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column( "span_op_breakdowns", Nested([ ("key", String(Modifiers(low_cardinality=True))), ("value", Float(64)), ]), ), after="measurements.value", ), ]
def __init__(self) -> None: super().__init__( writable_storage_key=StorageKey.METRICS_COUNTERS_BUCKETS, readable_storage_key=StorageKey.METRICS_COUNTERS, value_schema=[ Column("value", AggregateFunction("sum", [Float(64)])) ], mappers=TranslationMappers(columns=[ ColumnToFunction( None, "value", "sumMerge", (ColumnExpr(None, None, "value"), ), ), ], ), )
def get_migration_args_for_counters( granularity: int = ORIGINAL_GRANULARITY, ) -> MigrationArgs: return { "source_table_name": "metrics_counters_buckets_local", "table_name": "metrics_counters_local", "mv_name": get_mv_name("counters", granularity), "aggregation_col_schema": [ Column("value", AggregateFunction("sum", [Float(64)])), ], "aggregation_states": "sumState(value) as value", "granularity": granularity, }
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ *self.__forward_migrations("metrics_distributions_local"), get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_distributions_local", mv_name=get_polymorphic_mv_v3_name("distributions"), aggregation_col_schema=COL_SCHEMA_DISTRIBUTIONS_V2, aggregation_states= ("quantilesState(0.5, 0.75, 0.9, 0.95, 0.99)((arrayJoin(distribution_values) AS values_rows)) as percentiles, " "minState(values_rows) as min, " "maxState(values_rows) as max, " "avgState(values_rows) as avg, " "sumState(values_rows) as sum, " "countState(values_rows) as count, " "histogramState(250)(values_rows) as histogram_buckets"), metric_type="distribution", materialization_version=4, ), # No changes in those MV's schema. We just need to recreate the # same exact MV as in 0023 for the new materialization_version get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_sets_local", mv_name=get_polymorphic_mv_v3_name("sets"), aggregation_col_schema=[ Column("value", AggregateFunction("uniqCombined64", [UInt(64)])), ], aggregation_states= "uniqCombined64State(arrayJoin(set_values)) as value", metric_type="set", materialization_version=4, ), get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_counters_local", mv_name=get_polymorphic_mv_v3_name("counters"), aggregation_col_schema=[ Column("value", AggregateFunction("sum", [Float(64)])), ], aggregation_states="sumState(count_value) as value", metric_type="counter", materialization_version=4, ), ]
def __init__(self) -> None: super().__init__( writable_storage_key=StorageKey.METRICS_DISTRIBUTIONS_BUCKETS, readable_storage_key=StorageKey.METRICS_DISTRIBUTIONS, value_schema=[ Column( "percentiles", AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)", [Float(64)]), ), Column("min", AggregateFunction("min", [Float(64)])), Column("max", AggregateFunction("max", [Float(64)])), Column("avg", AggregateFunction("avg", [Float(64)])), Column("sum", AggregateFunction("sum", [Float(64)])), Column("count", AggregateFunction("count", [Float(64)])), ], mappers=TranslationMappers(columns=[ ColumnToCurriedFunction( None, "percentiles", FunctionCall( None, "quantilesMerge", tuple( Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99]), ), (ColumnExpr(None, None, "percentiles"), ), ), merge_mapper("min"), merge_mapper("max"), merge_mapper("avg"), merge_mapper("sum"), merge_mapper("count"), ], ), )
("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("sdk_name", String()), ("sdk_version", String()), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ( "measurements", Nested([("key", String()), ("value", Float(64))]), ), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("type", String(Modifiers(readonly=True))), ("message", String(Modifiers(readonly=True))), ("title", String(Modifiers(readonly=True))), ("timestamp", DateTime(Modifiers(readonly=True))), ]) schema = WritableTableSchema( columns=columns, local_table_name="transactions_local",
def __init__(self) -> None: self.__common_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", Nullable(String())), ("timestamp", DateTime()), ("platform", Nullable(String())), ("environment", Nullable(String())), ("release", Nullable(String())), ("dist", Nullable(String())), ("user", Nullable(String())), ("transaction", Nullable(String())), ("message", Nullable(String())), ("title", Nullable(String())), # User ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # SDK ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), # geo location context ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ] ) self.__events_columns = ColumnSet( [ ("group_id", Nullable(UInt(64))), ("primary_hash", Nullable(FixedString(32))), # Promoted tags ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ("search_message", Nullable(String())), ("location", Nullable(String())), ("culprit", Nullable(String())), ("received", Nullable(DateTime())), ("sdk_integrations", Nullable(Array(String()))), ("version", Nullable(String())), # exception interface ( "exception_stacks", Nested( [ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ] ), ), ( "exception_frames", Nested( [ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ] ), ), ("modules", Nested([("name", String()), ("version", String())])), ] ) self.__transactions_columns = ColumnSet( [ ("trace_id", Nullable(UUID())), ("span_id", Nullable(UInt(64))), ("transaction_hash", Nullable(UInt(64))), ("transaction_op", Nullable(String())), ("transaction_status", Nullable(UInt(8))), ("duration", Nullable(UInt(32))), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ] ) events_storage = get_storage(StorageKey.EVENTS) events_ro_storage = get_storage(StorageKey.EVENTS_RO) transactions_storage = get_storage(StorageKey.TRANSACTIONS) self.__time_group_columns: Mapping[str, str] = {} self.__time_parse_columns = ("timestamp",) super().__init__( storages=[events_storage, transactions_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=DiscoverQueryStorageSelector( events_table=events_storage, events_ro_table=events_ro_storage, abstract_events_columns=self.__events_columns, transactions_table=transactions_storage, abstract_transactions_columns=self.__transactions_columns, ), ), abstract_column_set=( self.__common_columns + self.__events_columns + self.__transactions_columns ), writable_storage=None, )
("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("sdk_name", String()), ("sdk_version", String()), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("measurements", Nested([("key", String()), ("value", Float(64))]),), ("span_op_breakdowns", Nested([("key", String()), ("value", Float(64))]),), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("type", String(Modifiers(readonly=True))), ("message", String(Modifiers(readonly=True))), ("title", String(Modifiers(readonly=True))), ("timestamp", DateTime(Modifiers(readonly=True))), ] ) schema = WritableTableSchema( columns=columns,
# This is expanded into arrays instead of being expressed as a # Nested column because, when adding new columns to a nested field # we need to provide a default for the entire array (each new column # is an array). # The same schema cannot be achieved with the Nested construct (where # we can only provide default for individual values), so, if we # use the Nested construct, this schema cannot match the one generated # by the migration framework (or by any ALTER statement). ("clickhouse_queries.sql", Array(String())), ("clickhouse_queries.status", Array(String())), ("clickhouse_queries.trace_id", Array(UUID(Modifiers(nullable=True)))), ("clickhouse_queries.duration_ms", Array(UInt(32))), ("clickhouse_queries.stats", Array(String())), ("clickhouse_queries.final", Array(UInt(8))), ("clickhouse_queries.cache_hit", Array(UInt(8))), ("clickhouse_queries.sample", Array(Float(32))), ("clickhouse_queries.max_threads", Array(UInt(8))), ("clickhouse_queries.num_days", Array(UInt(32))), ("clickhouse_queries.clickhouse_table", Array(String())), ("clickhouse_queries.query_id", Array(String())), # XXX: ``is_duplicate`` is currently not set when using the # ``Cache.get_readthrough`` query execution path. See GH-902. ("clickhouse_queries.is_duplicate", Array(UInt(8))), ("clickhouse_queries.consistent", Array(UInt(8))), ("clickhouse_queries.all_columns", Array(Array(String()))), ("clickhouse_queries.or_conditions", Array(UInt(8))), ("clickhouse_queries.where_columns", Array(Array(String()))), ("clickhouse_queries.where_mapping_columns", Array(Array(String()))), ("clickhouse_queries.groupby_columns", Array(Array(String()))), ("clickhouse_queries.array_join_columns", Array(Array(String()))), ])
class Migration(migration.ClickhouseNodeMigration): blocking = False local_table_name = "generic_metric_sets_raw_local" dist_table_name = "generic_metric_sets_raw_dist" columns: Sequence[Column[Modifiers]] = [ Column("use_case_id", String(Modifiers(low_cardinality=True))), Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("retention_days", UInt(16)), Column( "tags", Nested([ ("key", UInt(64)), ("indexed_value", UInt(64)), ("raw_value", String()), ]), ), Column("set_values", Array(UInt(64))), Column("count_value", Float(64)), Column("distribution_values", Array(Float(64))), Column("metric_type", String(Modifiers(low_cardinality=True))), Column("materialization_version", UInt(8)), Column("timeseries_id", UInt(32)), Column("partition", UInt(16)), Column("offset", UInt(64)), ] def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.local_table_name, engine=table_engines.MergeTree( storage_set=StorageSetKey.GENERIC_METRICS_SETS, order_by= "(use_case_id, org_id, project_id, metric_id, timestamp)", partition_by= "(toStartOfInterval(timestamp, toIntervalDay(3)))", ttl="timestamp + toIntervalDay(7)", ), columns=self.columns, ) ] def backwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.DropTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.local_table_name, ) ] def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.dist_table_name, engine=table_engines.Distributed( local_table_name=self.local_table_name, sharding_key="cityHash64(timeseries_id)", ), columns=self.columns, ) ] def backwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.DropTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.dist_table_name, ) ]
String, UInt, UUID, WithCodecs, WithDefault, ) from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), (("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)])), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions (("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", UInt(8))), (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", UUID(), UInt(8))), (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""), AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))), # Array (("Array(String)", "", "", ""), Array(String())), (("Array(DateTime)", "", "", ""), Array(DateTime())),
def visit_float(self, node: Node, visited_children: Iterable[Any]) -> ColumnType: size = int(node.children[1].text) return Float(size)
("os_name", String(Modifiers(nullable=True))), ("os_rooted", UInt(8, Modifiers(nullable=True))), ] ) promoted_context_columns = ColumnSet( [ ("os_build", String(Modifiers(nullable=True))), ("os_kernel_version", String(Modifiers(nullable=True))), ("device_name", String(Modifiers(nullable=True))), ("device_brand", String(Modifiers(nullable=True))), ("device_locale", String(Modifiers(nullable=True))), ("device_uuid", String(Modifiers(nullable=True))), ("device_model_id", String(Modifiers(nullable=True))), ("device_arch", String(Modifiers(nullable=True))), ("device_battery_level", Float(32, Modifiers(nullable=True))), ("device_orientation", String(Modifiers(nullable=True))), ("device_simulator", UInt(8, Modifiers(nullable=True))), ("device_online", UInt(8, Modifiers(nullable=True))), ("device_charging", UInt(8, Modifiers(nullable=True))), ] ) required_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)),
Column("projects", Array(UInt(64))), Column("organization", Nullable(UInt(64))), Column("timestamp", DateTime()), Column("duration_ms", UInt(32)), Column("status", status_type), Column( "clickhouse_queries", Nested([ Column("sql", String()), Column("status", status_type), Column("trace_id", Nullable(UUID())), Column("duration_ms", UInt(32)), Column("stats", String()), Column("final", UInt(8)), Column("cache_hit", UInt(8)), Column("sample", Float(32)), Column("max_threads", UInt(8)), Column("num_days", UInt(32)), Column("clickhouse_table", LowCardinality(String())), Column("query_id", String()), Column("is_duplicate", UInt(8)), Column("consistent", UInt(8)), ]), ), ] class Migration(migration.MultiStepMigration): blocking = False def forwards_local(self) -> Sequence[operations.Operation]:
("ip_address_v6", Nullable(IPv6())), ("user", String()), ("user_hash", ReadOnly(UInt(64))), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", String()), ("sdk_version", String()), ("http_method", Nullable(String())), ("http_referer", Nullable(String())), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", ReadOnly(Array(UInt(64)))), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("measurements", Nested([("key", String()), ("value", Float(64))]),), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ] ) schema = WritableTableSchema( columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[], prewhere_candidates=["event_id", "transaction_name", "transaction", "title"],
("os_name", Nullable(String())), ("os_rooted", Nullable(UInt(8))), ] ) promoted_context_columns = ColumnSet( [ ("os_build", Nullable(String())), ("os_kernel_version", Nullable(String())), ("device_name", Nullable(String())), ("device_brand", Nullable(String())), ("device_locale", Nullable(String())), ("device_uuid", Nullable(String())), ("device_model_id", Nullable(String())), ("device_arch", Nullable(String())), ("device_battery_level", Nullable(Float(32))), ("device_orientation", Nullable(String())), ("device_simulator", Nullable(UInt(8))), ("device_online", Nullable(UInt(8))), ("device_charging", Nullable(UInt(8))), ] ) required_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)),
("projects", Array(UInt(64))), ("organization", Nullable(UInt(64))), ("timestamp", DateTime()), ("duration_ms", UInt(32)), ("status", status_type), ( "clickhouse_queries", Nested([ ("sql", String()), ("status", status_type), ("trace_id", Nullable(UUID())), ("duration_ms", UInt(32)), ("stats", String()), ("final", UInt(8)), ("cache_hit", UInt(8)), ("sample", Float(32)), ("max_threads", UInt(8)), ("num_days", UInt(32)), ("clickhouse_table", LowCardinality(String())), ("query_id", String()), ("is_duplicate", UInt(8)), ("consistent", UInt(8)), ]), ), ]) schema = MergeTreeSchema( columns=columns, local_table_name="querylog_local", dist_table_name="querylog_dist", order_by="(toStartOfDay(timestamp), request_id)",
from typing import Sequence from snuba.clickhouse.columns import AggregateFunction, Column, Float from snuba.migrations import migration, operations from snuba.migrations.columns import MigrationModifiers from snuba.migrations.snuba_migrations.metrics.templates import ( get_forward_migrations_dist, get_forward_migrations_local, get_reverse_table_migration, ) COL_SCHEMA: Sequence[Column[MigrationModifiers]] = [ Column( "percentiles", AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)", [Float(64)]), ), Column("min", AggregateFunction("min", [Float(64)])), Column("max", AggregateFunction("max", [Float(64)])), Column("avg", AggregateFunction("avg", [Float(64)])), Column("sum", AggregateFunction("sum", [Float(64)])), Column("count", AggregateFunction("count", [Float(64)])), ] class Migration(migration.ClickhouseNodeMigration): blocking = False def forwards_local(self) -> Sequence[operations.SqlOperation]: return get_forward_migrations_local( source_table_name="metrics_distributions_buckets_local",
Column("tags", Nested([Column("key", UInt(64)), Column("value", UInt(64))])), ] POST_VALUES_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [ Column("materialization_version", UInt(8)), Column("retention_days", UInt(16)), Column("partition", UInt(16)), Column("offset", UInt(64)), ] COL_SCHEMA_DISTRIBUTIONS: Sequence[Column[Modifiers]] = [ Column( "percentiles", AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)", [Float(64)]), ), Column("min", AggregateFunction("min", [Float(64)])), Column("max", AggregateFunction("max", [Float(64)])), Column("avg", AggregateFunction("avg", [Float(64)])), Column("sum", AggregateFunction("sum", [Float(64)])), Column("count", AggregateFunction("count", [Float(64)])), ] COL_SCHEMA_DISTRIBUTIONS_V2: Sequence[Column[Modifiers]] = [ *COL_SCHEMA_DISTRIBUTIONS, Column("histogram", AggregateFunction("histogram(250)", [Float(64)])), ] def get_forward_bucket_table_local(
("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", WithDefault(LowCardinality(String()), "''")), ("sdk_version", WithDefault(LowCardinality(String()), "''")), ("http_method", LowCardinality(Nullable(String()))), ("http_referer", Nullable(String())), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ] ) schema = ReplacingMergeTreeSchema( columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[],
def __init__(self): metadata_columns = ColumnSet([ # optional stream related data ('offset', Nullable(UInt(64))), ('partition', Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ('level', Nullable(String())), ('logger', Nullable(String())), ('server_name', Nullable(String())), # future name: device_id? ('transaction', Nullable(String())), ('environment', Nullable(String())), ('sentry:release', Nullable(String())), ('sentry:dist', Nullable(String())), ('sentry:user', Nullable(String())), ('site', Nullable(String())), ('url', Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ('app_device', Nullable(String())), ('device', Nullable(String())), ('device_family', Nullable(String())), ('runtime', Nullable(String())), ('runtime_name', Nullable(String())), ('browser', Nullable(String())), ('browser_name', Nullable(String())), ('os', Nullable(String())), ('os_name', Nullable(String())), ('os_rooted', Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ('os_build', Nullable(String())), ('os_kernel_version', Nullable(String())), ('device_name', Nullable(String())), ('device_brand', Nullable(String())), ('device_locale', Nullable(String())), ('device_uuid', Nullable(String())), ('device_model_id', Nullable(String())), ('device_arch', Nullable(String())), ('device_battery_level', Nullable(Float(32))), ('device_orientation', Nullable(String())), ('device_simulator', Nullable(UInt(8))), ('device_online', Nullable(UInt(8))), ('device_charging', Nullable(UInt(8))), ]) required_columns = ColumnSet([ ('event_id', FixedString(32)), ('project_id', UInt(64)), ('group_id', UInt(64)), ('timestamp', DateTime()), ('deleted', UInt(8)), ('retention_days', UInt(16)), ]) all_columns = required_columns + [ # required for non-deleted ('platform', Nullable(String())), ('message', Nullable(String())), ('primary_hash', Nullable(FixedString(32))), ('received', Nullable(DateTime())), ('search_message', Nullable(String())), ('title', Nullable(String())), ('location', Nullable(String())), # optional user ('user_id', Nullable(String())), ('username', Nullable(String())), ('email', Nullable(String())), ('ip_address', Nullable(String())), # optional geo ('geo_country_code', Nullable(String())), ('geo_region', Nullable(String())), ('geo_city', Nullable(String())), ('sdk_name', Nullable(String())), ('sdk_version', Nullable(String())), ('type', Nullable(String())), ('version', Nullable(String())), ] + metadata_columns \ + promoted_context_columns \ + promoted_tag_columns \ + promoted_context_tag_columns \ + [ # other tags ('tags', Nested([ ('key', String()), ('value', String()), ])), # other context ('contexts', Nested([ ('key', String()), ('value', String()), ])), # http interface ('http_method', Nullable(String())), ('http_referer', Nullable(String())), # exception interface ('exception_stacks', Nested([ ('type', Nullable(String())), ('value', Nullable(String())), ('mechanism_type', Nullable(String())), ('mechanism_handled', Nullable(UInt(8))), ])), ('exception_frames', Nested([ ('abs_path', Nullable(String())), ('filename', Nullable(String())), ('package', Nullable(String())), ('module', Nullable(String())), ('function', Nullable(String())), ('in_app', Nullable(UInt(8))), ('colno', Nullable(UInt(32))), ('lineno', Nullable(UInt(32))), ('stack_level', UInt(16)), ])), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ('culprit', Nullable(String())), ('sdk_integrations', Array(String())), ('modules', Nested([ ('name', String()), ('version', String()), ])), ] sample_expr = 'cityHash64(toString(event_id))' schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name='sentry_local', dist_table_name='sentry_dist', mandatory_conditions=[('deleted', '=', 0)], order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr, partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))', version_column='deleted', sample_expr=sample_expr, migration_function=events_migrations) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ) ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ 'time': 'timestamp', 'rtime': 'received' }, time_parse_columns=('timestamp', 'received') ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )