Example #1
0
    Column("timestamp", DateTime()),
    Column("tags", Nested([Column("key", UInt(64)),
                           Column("value", UInt(64))])),
]

POST_VALUES_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [
    Column("materialization_version", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("partition", UInt(16)),
    Column("offset", UInt(64)),
]

COL_SCHEMA_DISTRIBUTIONS: Sequence[Column[Modifiers]] = [
    Column(
        "percentiles",
        AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                          [Float(64)]),
    ),
    Column("min", AggregateFunction("min", [Float(64)])),
    Column("max", AggregateFunction("max", [Float(64)])),
    Column("avg", AggregateFunction("avg", [Float(64)])),
    Column("sum", AggregateFunction("sum", [Float(64)])),
    Column("count", AggregateFunction("count", [Float(64)])),
]

COL_SCHEMA_DISTRIBUTIONS_V2: Sequence[Column[Modifiers]] = [
    *COL_SCHEMA_DISTRIBUTIONS,
    Column("histogram", AggregateFunction("histogram(250)", [Float(64)])),
]


def get_forward_bucket_table_local(
Example #2
0
raw_schema = WritableTableSchema(
    columns=all_columns,
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
)

read_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("started", DateTime()),
    ("release", String()),
    ("environment", String()),
    (
        "duration_quantiles",
        AggregateFunction("quantilesIf(0.5, 0.9)", UInt(32), UInt(8)),
    ),
    ("sessions", AggregateFunction("countIf", UUID(), UInt(8))),
    ("users", AggregateFunction("uniqIf", UUID(), UInt(8))),
    (
        "sessions_crashed",
        AggregateFunction("countIf", UUID(), UInt(8)),
    ),
    (
        "sessions_abnormal",
        AggregateFunction("countIf", UUID(), UInt(8)),
    ),
    ("sessions_errored", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_crashed", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_abnormal", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_errored", AggregateFunction("uniqIf", UUID(), UInt(8))),
Example #3
0
test_data = [
    # Basic types
    (("Date", "", "", ""), Date()),
    (("DateTime", "", "", ""), DateTime()),
    (("Enum8('success' = 0, 'error' = 1)", "", "", ""),
     Enum([("success", 0), ("error", 1)])),
    (("FixedString(32)", "", "", ""), FixedString(32)),
    (("Float32", "", "", ""), Float(32)),
    (("IPv4", "", "", ""), IPv4()),
    (("IPv6", "", "", ""), IPv6()),
    (("String", "", "", ""), String()),
    (("UInt32", "", "", ""), UInt(32)),
    (("UUID", "", "", ""), UUID()),
    # Aggregate functions
    (("AggregateFunction(uniq, UInt8)", "", "", ""),
     AggregateFunction("uniq", UInt(8))),
    (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""),
     AggregateFunction("countIf", UUID(), UInt(8))),
    (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""),
     AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))),
    # Array
    (("Array(String)", "", "", ""), Array(String())),
    (("Array(DateTime)", "", "", ""), Array(DateTime())),
    (("Array(UInt64)", "", "", ""), Array(UInt(64))),
    (("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))),
    # Nullable
    (("Nullable(String)", "", "", ""), Nullable(String())),
    (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))),
    (("Nullable(Date)", "", "", ""), Nullable(Date())),
    # Low cardinality
    (("LowCardinality(String)", "", "", ""), LowCardinality(String())),
Example #4
0
class Migration(migration.ClickhouseNodeMigration):
    blocking = False
    view_name = "generic_metric_sets_aggregation_mv"
    dest_table_columns: Sequence[Column[Modifiers]] = [
        Column("org_id", UInt(64)),
        Column("project_id", UInt(64)),
        Column("metric_id", UInt(64)),
        Column("granularity", UInt(8)),
        Column("timestamp",
               DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))),
        Column("retention_days", UInt(16)),
        Column(
            "tags",
            Nested([
                ("key", UInt(64)),
                ("indexed_value", UInt(64)),
                ("raw_value", String()),
            ]),
        ),
        Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        Column("use_case_id", String(Modifiers(low_cardinality=True))),
    ]

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.view_name,
            ),
            operations.CreateMaterializedView(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                view_name=self.view_name,
                columns=self.dest_table_columns,
                destination_table_name="generic_metric_sets_local",
                query="""
                SELECT
                    use_case_id,
                    org_id,
                    project_id,
                    metric_id,
                    arrayJoin(granularities) as granularity,
                    tags.key,
                    tags.indexed_value,
                    tags.raw_value,
                    toDateTime(multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1) *
                      intDiv(toUnixTimestamp(timestamp),
                             multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1))) as timestamp,
                    retention_days,
                    uniqCombined64State(arrayJoin(set_values)) as value
                FROM generic_metric_sets_raw_local
                WHERE materialization_version = 1
                  AND metric_type = 'set'
                GROUP BY
                    use_case_id,
                    org_id,
                    project_id,
                    metric_id,
                    tags.key,
                    tags.indexed_value,
                    tags.raw_value,
                    timestamp,
                    granularity,
                    retention_days
                """,
            ),
        ]

    def backwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.view_name,
            )
        ]

    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
        return []

    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
        return []
Example #5
0
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.processor import MAX_UINT32, NIL_UUID
from .matview import create_matview_v1

aggregate_columns: Sequence[Column[Modifiers]] = [
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("started", DateTime()),
    Column("release", String(Modifiers(low_cardinality=True))),
    Column("environment", String(Modifiers(low_cardinality=True))),
    Column("user_agent", String(Modifiers(low_cardinality=True))),
    Column("os", String(Modifiers(low_cardinality=True))),
    # durations
    Column(
        "duration_quantiles",
        AggregateFunction("quantilesIf(0.5, 0.9)",
                          [UInt(32), UInt(8)]),
    ),
    Column("duration_avg", AggregateFunction("avgIf",
                                             [UInt(32), UInt(8)])),
    # sessions:
    Column("sessions", AggregateFunction("countIf", [UUID(), UInt(8)])),
    Column("sessions_preaggr", AggregateFunction("sumIf",
                                                 [UInt(32), UInt(8)])),
    Column("sessions_crashed", AggregateFunction("countIf",
                                                 [UUID(), UInt(8)])),
    Column("sessions_crashed_preaggr",
           AggregateFunction("sumIf", [UInt(32), UInt(8)])),
    Column("sessions_abnormal", AggregateFunction("countIf",
                                                  [UUID(), UInt(8)])),
    Column("sessions_abnormal_preaggr",
           AggregateFunction("sumIf", [UInt(32), UInt(8)])),
Example #6
0
         [("key", String()), ("val", String(Modifier(nullable=True)))],
         Modifier(nullable=True),
     ),
     Nested([("key", String()), ("val", String())]),
     cast(
         Column[Modifier],
         Nested([("key", String()), ("val", String())],
                Modifier(nullable=True)),
     ),
     "Nullable(Nested(key String, val Nullable(String)))",
     id="nested",
 ),
 pytest.param(
     cast(
         Column[Modifier],
         AggregateFunction("uniqIf", [UInt(8), UInt(32)],
                           Modifier(nullable=True)),
     ),
     AggregateFunction("uniqIf", [UInt(8), UInt(32)]),
     cast(
         Column[Modifier],
         AggregateFunction("uniqIf", [UInt(8)], Modifier(nullable=True)),
     ),
     "Nullable(AggregateFunction(uniqIf, UInt8, UInt32))",
     id="aggregated",
 ),
 pytest.param(
     Enum([("a", 1), ("b", 2)], Modifier(nullable=True)),
     Enum([("a", 1), ("b", 2)]),
     Enum([("a", 1), ("b", 2)]),
     "Nullable(Enum('a' = 1, 'b' = 2))",
     id="enums",
new_raw_columns: Sequence[Tuple[Column[Modifiers], str]] = [
    (
        Column("quantity", UInt(32, Modifiers(default=str(DEFAULT_QUANTITY)))),
        "distinct_id",
    ),
    (Column("user_agent",
            String(Modifiers(low_cardinality=True))), "environment"),
    (Column("os", String(Modifiers(low_cardinality=True))), "user_agent"),
]

new_dest_columns: Sequence[Tuple[Column[Modifiers], str]] = [
    (Column("user_agent",
            String(Modifiers(low_cardinality=True))), "environment"),
    (Column("os", String(Modifiers(low_cardinality=True))), "user_agent"),
    (
        Column("duration_avg", AggregateFunction("avgIf",
                                                 [UInt(32), UInt(8)])),
        "duration_quantiles",
    ),
    (
        Column("sessions_preaggr",
               AggregateFunction("sumIf", [UInt(32), UInt(8)])),
        "sessions",
    ),
    (
        Column(
            "sessions_crashed_preaggr",
            AggregateFunction("sumIf", [UInt(32), UInt(8)]),
        ),
        "sessions_crashed",
    ),
    (
Example #8
0
    Column("timestamp", DateTime()),
    Column("retention_days", UInt(16)),
    Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])),
    Column("_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))),
]

sets_storage = ReadableTableStorage(
    storage_key=StorageKey.METRICS_SETS,
    storage_set_key=StorageSetKey.METRICS,
    schema=TableSchema(
        local_table_name="metrics_sets_local",
        dist_table_name="metrics_sets_dist",
        storage_set_key=StorageSetKey.METRICS,
        columns=ColumnSet([
            *aggregated_columns,
            Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        ]),
    ),
    query_processors=[ArrayJoinKeyValueOptimizer("tags")],
)

counters_storage = ReadableTableStorage(
    storage_key=StorageKey.METRICS_COUNTERS,
    storage_set_key=StorageSetKey.METRICS,
    schema=TableSchema(
        local_table_name="metrics_counters_local",
        dist_table_name="metrics_counters_dist",
        storage_set_key=StorageSetKey.METRICS,
        columns=ColumnSet([
            *aggregated_columns,
            Column("value", AggregateFunction("sum", [Float(64)])),
Example #9
0
 def visit_agg(self, node: Node,
               visited_children: Iterable[Any]) -> AggregateFunction:
     (_agg, _paren, _sp, agg_func, _sp, _comma, _sp, agg_types, _sp,
      _paren) = visited_children
     return AggregateFunction(agg_func, *agg_types)