Column("timestamp", DateTime()), Column("tags", Nested([Column("key", UInt(64)), Column("value", UInt(64))])), ] POST_VALUES_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [ Column("materialization_version", UInt(8)), Column("retention_days", UInt(16)), Column("partition", UInt(16)), Column("offset", UInt(64)), ] COL_SCHEMA_DISTRIBUTIONS: Sequence[Column[Modifiers]] = [ Column( "percentiles", AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)", [Float(64)]), ), Column("min", AggregateFunction("min", [Float(64)])), Column("max", AggregateFunction("max", [Float(64)])), Column("avg", AggregateFunction("avg", [Float(64)])), Column("sum", AggregateFunction("sum", [Float(64)])), Column("count", AggregateFunction("count", [Float(64)])), ] COL_SCHEMA_DISTRIBUTIONS_V2: Sequence[Column[Modifiers]] = [ *COL_SCHEMA_DISTRIBUTIONS, Column("histogram", AggregateFunction("histogram(250)", [Float(64)])), ] def get_forward_bucket_table_local(
raw_schema = WritableTableSchema( columns=all_columns, local_table_name=WRITE_LOCAL_TABLE_NAME, dist_table_name=WRITE_DIST_TABLE_NAME, storage_set_key=StorageSetKey.SESSIONS, ) read_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("started", DateTime()), ("release", String()), ("environment", String()), ( "duration_quantiles", AggregateFunction("quantilesIf(0.5, 0.9)", UInt(32), UInt(8)), ), ("sessions", AggregateFunction("countIf", UUID(), UInt(8))), ("users", AggregateFunction("uniqIf", UUID(), UInt(8))), ( "sessions_crashed", AggregateFunction("countIf", UUID(), UInt(8)), ), ( "sessions_abnormal", AggregateFunction("countIf", UUID(), UInt(8)), ), ("sessions_errored", AggregateFunction("uniqIf", UUID(), UInt(8))), ("users_crashed", AggregateFunction("uniqIf", UUID(), UInt(8))), ("users_abnormal", AggregateFunction("uniqIf", UUID(), UInt(8))), ("users_errored", AggregateFunction("uniqIf", UUID(), UInt(8))),
test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), (("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)])), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions (("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", UInt(8))), (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", UUID(), UInt(8))), (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""), AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))), # Array (("Array(String)", "", "", ""), Array(String())), (("Array(DateTime)", "", "", ""), Array(DateTime())), (("Array(UInt64)", "", "", ""), Array(UInt(64))), (("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))), # Nullable (("Nullable(String)", "", "", ""), Nullable(String())), (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))), (("Nullable(Date)", "", "", ""), Nullable(Date())), # Low cardinality (("LowCardinality(String)", "", "", ""), LowCardinality(String())),
class Migration(migration.ClickhouseNodeMigration): blocking = False view_name = "generic_metric_sets_aggregation_mv" dest_table_columns: Sequence[Column[Modifiers]] = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("granularity", UInt(8)), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), Column( "tags", Nested([ ("key", UInt(64)), ("indexed_value", UInt(64)), ("raw_value", String()), ]), ), Column("value", AggregateFunction("uniqCombined64", [UInt(64)])), Column("use_case_id", String(Modifiers(low_cardinality=True))), ] def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.DropTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.view_name, ), operations.CreateMaterializedView( storage_set=StorageSetKey.GENERIC_METRICS_SETS, view_name=self.view_name, columns=self.dest_table_columns, destination_table_name="generic_metric_sets_local", query=""" SELECT use_case_id, org_id, project_id, metric_id, arrayJoin(granularities) as granularity, tags.key, tags.indexed_value, tags.raw_value, toDateTime(multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1) * intDiv(toUnixTimestamp(timestamp), multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1))) as timestamp, retention_days, uniqCombined64State(arrayJoin(set_values)) as value FROM generic_metric_sets_raw_local WHERE materialization_version = 1 AND metric_type = 'set' GROUP BY use_case_id, org_id, project_id, metric_id, tags.key, tags.indexed_value, tags.raw_value, timestamp, granularity, retention_days """, ), ] def backwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.DropTable( storage_set=StorageSetKey.GENERIC_METRICS_SETS, table_name=self.view_name, ) ] def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [] def backwards_dist(self) -> Sequence[operations.SqlOperation]: return []
from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.processor import MAX_UINT32, NIL_UUID from .matview import create_matview_v1 aggregate_columns: Sequence[Column[Modifiers]] = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("started", DateTime()), Column("release", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(low_cardinality=True))), Column("user_agent", String(Modifiers(low_cardinality=True))), Column("os", String(Modifiers(low_cardinality=True))), # durations Column( "duration_quantiles", AggregateFunction("quantilesIf(0.5, 0.9)", [UInt(32), UInt(8)]), ), Column("duration_avg", AggregateFunction("avgIf", [UInt(32), UInt(8)])), # sessions: Column("sessions", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), Column("sessions_crashed", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_crashed_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), Column("sessions_abnormal", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_abnormal_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])),
[("key", String()), ("val", String(Modifier(nullable=True)))], Modifier(nullable=True), ), Nested([("key", String()), ("val", String())]), cast( Column[Modifier], Nested([("key", String()), ("val", String())], Modifier(nullable=True)), ), "Nullable(Nested(key String, val Nullable(String)))", id="nested", ), pytest.param( cast( Column[Modifier], AggregateFunction("uniqIf", [UInt(8), UInt(32)], Modifier(nullable=True)), ), AggregateFunction("uniqIf", [UInt(8), UInt(32)]), cast( Column[Modifier], AggregateFunction("uniqIf", [UInt(8)], Modifier(nullable=True)), ), "Nullable(AggregateFunction(uniqIf, UInt8, UInt32))", id="aggregated", ), pytest.param( Enum([("a", 1), ("b", 2)], Modifier(nullable=True)), Enum([("a", 1), ("b", 2)]), Enum([("a", 1), ("b", 2)]), "Nullable(Enum('a' = 1, 'b' = 2))", id="enums",
new_raw_columns: Sequence[Tuple[Column[Modifiers], str]] = [ ( Column("quantity", UInt(32, Modifiers(default=str(DEFAULT_QUANTITY)))), "distinct_id", ), (Column("user_agent", String(Modifiers(low_cardinality=True))), "environment"), (Column("os", String(Modifiers(low_cardinality=True))), "user_agent"), ] new_dest_columns: Sequence[Tuple[Column[Modifiers], str]] = [ (Column("user_agent", String(Modifiers(low_cardinality=True))), "environment"), (Column("os", String(Modifiers(low_cardinality=True))), "user_agent"), ( Column("duration_avg", AggregateFunction("avgIf", [UInt(32), UInt(8)])), "duration_quantiles", ), ( Column("sessions_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), "sessions", ), ( Column( "sessions_crashed_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)]), ), "sessions_crashed", ), (
Column("timestamp", DateTime()), Column("retention_days", UInt(16)), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), Column("_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))), ] sets_storage = ReadableTableStorage( storage_key=StorageKey.METRICS_SETS, storage_set_key=StorageSetKey.METRICS, schema=TableSchema( local_table_name="metrics_sets_local", dist_table_name="metrics_sets_dist", storage_set_key=StorageSetKey.METRICS, columns=ColumnSet([ *aggregated_columns, Column("value", AggregateFunction("uniqCombined64", [UInt(64)])), ]), ), query_processors=[ArrayJoinKeyValueOptimizer("tags")], ) counters_storage = ReadableTableStorage( storage_key=StorageKey.METRICS_COUNTERS, storage_set_key=StorageSetKey.METRICS, schema=TableSchema( local_table_name="metrics_counters_local", dist_table_name="metrics_counters_dist", storage_set_key=StorageSetKey.METRICS, columns=ColumnSet([ *aggregated_columns, Column("value", AggregateFunction("sum", [Float(64)])),
def visit_agg(self, node: Node, visited_children: Iterable[Any]) -> AggregateFunction: (_agg, _paren, _sp, agg_func, _sp, _comma, _sp, agg_types, _sp, _paren) = visited_children return AggregateFunction(agg_func, *agg_types)