def test_entity_data_model() -> None: entity_data_model = EntityColumnSet(columns=[ Column("event_id", FixedString(32)), Column("project_id", UInt(64)), WildcardColumn("tags", String()), WildcardColumn("contexts", String()), ]) event_id_col = entity_data_model.get("event_id") assert event_id_col is not None assert event_id_col.name == "event_id" assert event_id_col.type == FixedString(32) assert entity_data_model.get("tags[asdf]") == FlattenedColumn( None, "tags[asdf]", String()) assert entity_data_model.get("asdf") is None assert entity_data_model.get("tags[asd f]") is None assert entity_data_model.get("asdf[gkrurrtsjhfkjgh]") is None assert entity_data_model == EntityColumnSet(columns=[ Column("event_id", FixedString(32)), Column("project_id", UInt(64)), WildcardColumn("tags", String()), WildcardColumn("contexts", String()), ])
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_raw_dist", column=Column("quantity", UInt(32)), after=None, ), operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_raw_dist", column=Column("category", UInt(8)), after=None, ), operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_hourly_dist", column=Column("quantity", UInt(64)), after=None, ), operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_hourly_dist", column=Column("category", UInt(8)), after=None, ), ]
def __init__( self, writable_storage_key: StorageKey, readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, ) -> None: writable_storage = get_writable_storage(writable_storage_key) readable_storage = get_storage(readable_storage_key) super().__init__( storages=[writable_storage, readable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="timestamp", )
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_raw_local", column=Column("quantity", UInt(32)), after="reason", ), operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_raw_local", column=Column("category", UInt(8)), after="timestamp", ), operations.AddColumn( storage_set=StorageSetKey.OUTCOMES, table_name="outcomes_hourly_local", column=Column("quantity", UInt(64)), after="reason", ), operations.RunSql( storage_set=StorageSetKey.OUTCOMES, statement=""" ALTER TABLE outcomes_hourly_local ADD COLUMN IF NOT EXISTS category UInt8 AFTER timestamp, MODIFY ORDER BY (org_id, project_id, key_id, outcome, reason, timestamp, category); """, ), ]
def __backward_migrations( self, table_name: str ) -> Sequence[operations.SqlOperation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "title", String( Modifiers(low_cardinality=True, materialized="transaction_name") ), ), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "message", String( Modifiers(low_cardinality=True, materialized="transaction_name") ), ), ), ]
def backwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("_contexts_flattened", String()), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("_tags_flattened", String()), ), ]
def __forward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("title", String()), ), operations.ModifyColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("message", String()), ), ]
def __backwards_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("title", String(Modifiers(nullable=True))), ), operations.ModifyColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("message", String(Modifiers(nullable=True))), ), ]
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), ttl_month=("finish_ts", 1), ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("hierarchical_hashes", Array(UUID())), after="primary_hash", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_dist", column=Column("hierarchical_hashes", Array(FixedString(32)),), after="primary_hash", ), ]
def __forward_migrations( self, table_name: str) -> Sequence[operations.Operation]: return [ operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("status", LowCardinality(String())), ), operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("clickhouse_queries.status", Array(LowCardinality(String()))), ), ]
def __init__(self) -> None: super().__init__( writable_storage_key=None, readable_storage_key=StorageKey.ORG_METRICS_COUNTERS, value_schema=[], mappers=TranslationMappers(), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("bucketed_time", DateTime()), ]), validators=[GranularityValidator(minimum=3600)], )
def __backwards_migrations( self, table_name: str) -> Sequence[operations.Operation]: status_type = Enum([("success", 0), ("error", 1), ("rate-limited", 2)]) return [ operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("status", status_type), ), operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("clickhouse_queries.status", Array(status_type)), ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_local", column=Column("http_method", LowCardinality(Nullable(String()))), after="sdk_version", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_local", column=Column("http_referer", Nullable(String())), after="http_method", ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.CreateTable( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_local", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.TRANSACTIONS, version_column="deleted", order_by= ("(project_id, toStartOfDay(finish_ts), transaction_name, " "cityHash64(transaction_span_id), op, cityHash64(trace_id), " "cityHash64(span_id))"), partition_by="(toMonday(finish_ts))", sample_by="cityHash64(span_id)", ttl="finish_ts + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_local", column=Column( "_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN), ), after="tags.value", ), ]
def backwards_local(self) -> Sequence[operations.SqlOperation]: sample_expr = "cityHash64(toString(event_id))" return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.EVENTS, version_column="deleted", order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr, partition_by="(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))", sample_by=sample_expr, ), ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column( "_tags_hash_map", Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)), ), after="_tags_flattened", ), ]
def forwards_dist(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("http_method", LowCardinality(Nullable(String()))), after="sdk_version", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("http_referer", Nullable(String())), after="http_method", ), ]
def __forward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("_tags_hash_map", Array(UInt(64))), after="tags", ), operations.AddColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("deleted", UInt(8)), after="contexts", ), ]
def test_modify_column() -> None: assert ( ModifyColumn( StorageSetKey.EVENTS, "test_table", Column("test", String()) ).format_sql() == "ALTER TABLE test_table MODIFY COLUMN test String;" )
def get_forward_view_migration_polymorphic_table_v3( source_table_name: str, table_name: str, mv_name: str, aggregation_col_schema: Sequence[Column[Modifiers]], aggregation_states: str, metric_type: str, target_mat_version: int, appended_where_clause: str = "", ) -> operations.SqlOperation: aggregated_cols = [ Column("use_case_id", String(Modifiers(low_cardinality=True))), *COMMON_AGGR_COLUMNS, *aggregation_col_schema, ] return operations.CreateMaterializedView( storage_set=StorageSetKey.METRICS, view_name=mv_name, destination_table_name=table_name, columns=aggregated_cols, query=MATVIEW_STATEMENT_POLYMORPHIC_TABLE_V3 % { "metric_type": metric_type, "raw_table_name": source_table_name, "aggregation_states": aggregation_states, "target_mat_version": target_mat_version, "appended_where_clause": appended_where_clause, "materialization_version": target_mat_version, }, )
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return get_forward_migrations_dist( dist_table_name="metrics_counters_dist", local_table_name="metrics_counters_local", aggregation_col_schema=[ Column("value", AggregateFunction("sum", [Float(64)])), ], )
def __forward_migrations( self, table_name: str ) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "type", String( Modifiers(low_cardinality=True, materialized="'transaction'") ), ), after="deleted", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "message", String( Modifiers(low_cardinality=True, materialized="transaction_name") ), ), after="type", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "title", String( Modifiers(low_cardinality=True, materialized="transaction_name") ), ), after="message", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column( "timestamp", DateTime(Modifiers(materialized="finish_ts")) ), after="title", ), ]
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ *self.__forward_migrations("metrics_distributions_local"), get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_distributions_local", mv_name=get_polymorphic_mv_v3_name("distributions"), aggregation_col_schema=COL_SCHEMA_DISTRIBUTIONS_V2, aggregation_states= ("quantilesState(0.5, 0.75, 0.9, 0.95, 0.99)((arrayJoin(distribution_values) AS values_rows)) as percentiles, " "minState(values_rows) as min, " "maxState(values_rows) as max, " "avgState(values_rows) as avg, " "sumState(values_rows) as sum, " "countState(values_rows) as count, " "histogramState(250)(values_rows) as histogram_buckets"), metric_type="distribution", materialization_version=4, ), # No changes in those MV's schema. We just need to recreate the # same exact MV as in 0023 for the new materialization_version get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_sets_local", mv_name=get_polymorphic_mv_v3_name("sets"), aggregation_col_schema=[ Column("value", AggregateFunction("uniqCombined64", [UInt(64)])), ], aggregation_states= "uniqCombined64State(arrayJoin(set_values)) as value", metric_type="set", materialization_version=4, ), get_forward_view_migration_polymorphic_table_v2( source_table_name=self.raw_table_name, table_name="metrics_counters_local", mv_name=get_polymorphic_mv_v3_name("counters"), aggregation_col_schema=[ Column("value", AggregateFunction("sum", [Float(64)])), ], aggregation_states="sumState(count_value) as value", metric_type="counter", materialization_version=4, ), ]
def get_forward_migrations_local( source_table_name: str, table_name: str, mv_name: str, aggregation_col_schema: Sequence[Column[Modifiers]], aggregation_states: str, granularity: int, ) -> Sequence[operations.SqlOperation]: aggregated_cols = [*COMMON_AGGR_COLUMNS, *aggregation_col_schema] return [ operations.CreateTable( storage_set=StorageSetKey.METRICS, table_name=table_name, columns=aggregated_cols, engine=table_engines.AggregatingMergeTree( storage_set=StorageSetKey.METRICS, order_by= "(org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.value)", partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": "256"}, ), ), operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=table_name, column=Column( "_tags_hash", Array(UInt(64), Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)), ), after="tags.value", ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=table_name, index_name="bf_tags_hash", index_expression="_tags_hash", index_type="bloom_filter()", granularity=1, ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=table_name, index_name="bf_tags_key_hash", index_expression="tags.key", index_type="bloom_filter()", granularity=1, ), ] + [ get_forward_view_migration_local( source_table_name, table_name, mv_name, aggregation_col_schema, aggregation_states, granularity, ) ]
def forwards_dist(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("_tags_hash_map", Array(UInt(64)),), after="_tags_flattened", ), ]
def __backward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column("trace_id", UUID()), ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return get_forward_migrations_dist( dist_table_name="metrics_sets_dist", local_table_name="metrics_sets_local", aggregation_col_schema=[ Column("value", AggregateFunction("uniqCombined64", [UInt(64)])), ], )
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_local", column=Column("message_timestamp", DateTime()), after="offset", ) ]
def __backwards_migrations( self, table_name: str) -> Sequence[operations.Operation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("user", String(Modifiers(low_cardinality=True))), ) ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), ]