Exemple #1
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("spans.exclusive_time_32", Array(Float(32))),
             after="spans.group",
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("spans.exclusive_time_32", Array(Float(32))),
             ttl_month=("finish_ts", 1),
         ),
     ]
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return get_forward_migrations_dist(
         dist_table_name="metrics_counters_dist",
         local_table_name="metrics_counters_local",
         aggregation_col_schema=[
             Column("value", AggregateFunction("sum", [Float(64)])),
         ],
     )
Exemple #3
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column("spans.exclusive_time_32", Array(Float(32))),
             after="spans.group",
         ),
     ]
Exemple #4
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return (get_forward_view_migration_local_consolidated(
         source_table_name="metrics_counters_buckets_local",
         table_name="metrics_counters_local",
         mv_name=get_consolidated_mv_name("counters"),
         aggregation_col_schema=[
             Column("value", AggregateFunction("sum", [Float(64)])),
         ],
         aggregation_states="sumState(value) as value",
     ), )
Exemple #5
0
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_RAW,
         readable_storage_key=StorageKey.METRICS_COUNTERS,
         value_schema=[
             Column("value", AggregateFunction("sum", [Float(64)]))
         ],
         mappers=TranslationMappers(functions=[
             FunctionNameMapper("sum", "sumMerge"),
             FunctionNameMapper("sumIf", "sumMergeIf"),
         ], ),
     )
Exemple #6
0
 def forwards_dist(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "measurements",
                 Nested([("key", LowCardinality(String())), ("value", Float(64))]),
             ),
             after="_contexts_flattened",
         ),
     ]
Exemple #7
0
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_RAW,
         readable_storage_key=StorageKey.METRICS_DISTRIBUTIONS,
         value_schema=[
             Column(
                 "percentiles",
                 AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                                   [Float(64)]),
             ),
             Column("min", AggregateFunction("min", [Float(64)])),
             Column("max", AggregateFunction("max", [Float(64)])),
             Column("avg", AggregateFunction("avg", [Float(64)])),
             Column("sum", AggregateFunction("sum", [Float(64)])),
             Column("count", AggregateFunction("count", [Float(64)])),
             Column(
                 "histogram_buckets",
                 AggregateFunction("histogram(250)", [Float(64)]),
             ),
         ],
         mappers=TranslationMappers(
             functions=[
                 AggregateFunctionMapper("value", "min", "minMerge", "min"),
                 AggregateFunctionMapper("value", "minIf", "minMergeIf",
                                         "min"),
                 AggregateFunctionMapper("value", "max", "maxMerge", "max"),
                 AggregateFunctionMapper("value", "maxIf", "maxMergeIf",
                                         "max"),
                 AggregateFunctionMapper("value", "avg", "avgMerge", "avg"),
                 AggregateFunctionMapper("value", "avgIf", "avgMergeIf",
                                         "avg"),
                 AggregateFunctionMapper("value", "sum", "sumMerge", "sum"),
                 AggregateFunctionMapper("value", "sumIf", "sumMergeIf",
                                         "sum"),
                 AggregateFunctionMapper("value", "count", "countMerge",
                                         "count"),
                 AggregateFunctionMapper("value", "countIf", "countMergeIf",
                                         "count"),
             ],
             curried_functions=[
                 AggregateCurriedFunctionMapper("value", "quantiles",
                                                "quantilesMerge",
                                                "percentiles"),
                 AggregateCurriedFunctionMapper("value", "quantilesIf",
                                                "quantilesMergeIf",
                                                "percentiles"),
                 AggregateCurriedFunctionMapper("value", "histogram",
                                                "histogramMerge",
                                                "histogram_buckets"),
                 AggregateCurriedFunctionMapper("value", "histogramIf",
                                                "histogramMergeIf",
                                                "histogram_buckets"),
             ],
         ),
     )
Exemple #8
0
 def __forward_migrations(
         self, table_name: str) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.METRICS,
             table_name=table_name,
             column=Column(
                 "histogram_buckets",
                 AggregateFunction("histogram(250)", [Float(64)]),
             ),
             after="count",
         )
     ]
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "measurements",
                 Nested([
                     ("key", String(Modifiers(low_cardinality=True))),
                     ("value", Float(64)),
                 ]),
             ),
             after="_contexts_flattened",
         ),
     ]
Exemple #10
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "span_op_breakdowns",
                 Nested([
                     ("key", String(Modifiers(low_cardinality=True))),
                     ("value", Float(64)),
                 ]),
             ),
             after="measurements.value",
         ),
     ]
Exemple #11
0
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_COUNTERS_BUCKETS,
         readable_storage_key=StorageKey.METRICS_COUNTERS,
         value_schema=[
             Column("value", AggregateFunction("sum", [Float(64)]))
         ],
         mappers=TranslationMappers(columns=[
             ColumnToFunction(
                 None,
                 "value",
                 "sumMerge",
                 (ColumnExpr(None, None, "value"), ),
             ),
         ], ),
     )
Exemple #12
0
def get_migration_args_for_counters(
    granularity: int = ORIGINAL_GRANULARITY, ) -> MigrationArgs:
    return {
        "source_table_name":
        "metrics_counters_buckets_local",
        "table_name":
        "metrics_counters_local",
        "mv_name":
        get_mv_name("counters", granularity),
        "aggregation_col_schema": [
            Column("value", AggregateFunction("sum", [Float(64)])),
        ],
        "aggregation_states":
        "sumState(value) as value",
        "granularity":
        granularity,
    }
Exemple #13
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         *self.__forward_migrations("metrics_distributions_local"),
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_distributions_local",
             mv_name=get_polymorphic_mv_v3_name("distributions"),
             aggregation_col_schema=COL_SCHEMA_DISTRIBUTIONS_V2,
             aggregation_states=
             ("quantilesState(0.5, 0.75, 0.9, 0.95, 0.99)((arrayJoin(distribution_values) AS values_rows)) as percentiles, "
              "minState(values_rows) as min, "
              "maxState(values_rows) as max, "
              "avgState(values_rows) as avg, "
              "sumState(values_rows) as sum, "
              "countState(values_rows) as count, "
              "histogramState(250)(values_rows) as histogram_buckets"),
             metric_type="distribution",
             materialization_version=4,
         ),
         # No changes in those MV's schema. We just need to recreate the
         # same exact MV as in 0023 for the new materialization_version
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_sets_local",
             mv_name=get_polymorphic_mv_v3_name("sets"),
             aggregation_col_schema=[
                 Column("value",
                        AggregateFunction("uniqCombined64", [UInt(64)])),
             ],
             aggregation_states=
             "uniqCombined64State(arrayJoin(set_values)) as value",
             metric_type="set",
             materialization_version=4,
         ),
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_counters_local",
             mv_name=get_polymorphic_mv_v3_name("counters"),
             aggregation_col_schema=[
                 Column("value", AggregateFunction("sum", [Float(64)])),
             ],
             aggregation_states="sumState(count_value) as value",
             metric_type="counter",
             materialization_version=4,
         ),
     ]
Exemple #14
0
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_DISTRIBUTIONS_BUCKETS,
         readable_storage_key=StorageKey.METRICS_DISTRIBUTIONS,
         value_schema=[
             Column(
                 "percentiles",
                 AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                                   [Float(64)]),
             ),
             Column("min", AggregateFunction("min", [Float(64)])),
             Column("max", AggregateFunction("max", [Float(64)])),
             Column("avg", AggregateFunction("avg", [Float(64)])),
             Column("sum", AggregateFunction("sum", [Float(64)])),
             Column("count", AggregateFunction("count", [Float(64)])),
         ],
         mappers=TranslationMappers(columns=[
             ColumnToCurriedFunction(
                 None,
                 "percentiles",
                 FunctionCall(
                     None,
                     "quantilesMerge",
                     tuple(
                         Literal(None, quant)
                         for quant in [0.5, 0.75, 0.9, 0.95, 0.99]),
                 ),
                 (ColumnExpr(None, None, "percentiles"), ),
             ),
             merge_mapper("min"),
             merge_mapper("max"),
             merge_mapper("avg"),
             merge_mapper("sum"),
             merge_mapper("count"),
         ], ),
     )
Exemple #15
0
    ("user_hash", UInt(64, Modifiers(readonly=True))),
    ("user_id", String(Modifiers(nullable=True))),
    ("user_name", String(Modifiers(nullable=True))),
    ("user_email", String(Modifiers(nullable=True))),
    ("sdk_name", String()),
    ("sdk_version", String()),
    ("http_method", String(Modifiers(nullable=True))),
    ("http_referer", String(Modifiers(nullable=True))),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_flattened", String()),
    ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
    ("contexts", Nested([("key", String()), ("value", String())])),
    ("_contexts_flattened", String()),
    (
        "measurements",
        Nested([("key", String()), ("value", Float(64))]),
    ),
    ("partition", UInt(16)),
    ("offset", UInt(64)),
    ("message_timestamp", DateTime()),
    ("retention_days", UInt(16)),
    ("deleted", UInt(8)),
    ("type", String(Modifiers(readonly=True))),
    ("message", String(Modifiers(readonly=True))),
    ("title", String(Modifiers(readonly=True))),
    ("timestamp", DateTime(Modifiers(readonly=True))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="transactions_local",
Exemple #16
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet(
            [
                ("event_id", FixedString(32)),
                ("project_id", UInt(64)),
                ("type", Nullable(String())),
                ("timestamp", DateTime()),
                ("platform", Nullable(String())),
                ("environment", Nullable(String())),
                ("release", Nullable(String())),
                ("dist", Nullable(String())),
                ("user", Nullable(String())),
                ("transaction", Nullable(String())),
                ("message", Nullable(String())),
                ("title", Nullable(String())),
                # User
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # SDK
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                # geo location context
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # Other tags and context
                ("tags", Nested([("key", String()), ("value", String())])),
                ("contexts", Nested([("key", String()), ("value", String())])),
            ]
        )

        self.__events_columns = ColumnSet(
            [
                ("group_id", Nullable(UInt(64))),
                ("primary_hash", Nullable(FixedString(32))),
                # Promoted tags
                ("level", Nullable(String())),
                ("logger", Nullable(String())),
                ("server_name", Nullable(String())),
                ("site", Nullable(String())),
                ("url", Nullable(String())),
                ("search_message", Nullable(String())),
                ("location", Nullable(String())),
                ("culprit", Nullable(String())),
                ("received", Nullable(DateTime())),
                ("sdk_integrations", Nullable(Array(String()))),
                ("version", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested(
                        [
                            ("type", Nullable(String())),
                            ("value", Nullable(String())),
                            ("mechanism_type", Nullable(String())),
                            ("mechanism_handled", Nullable(UInt(8))),
                        ]
                    ),
                ),
                (
                    "exception_frames",
                    Nested(
                        [
                            ("abs_path", Nullable(String())),
                            ("filename", Nullable(String())),
                            ("package", Nullable(String())),
                            ("module", Nullable(String())),
                            ("function", Nullable(String())),
                            ("in_app", Nullable(UInt(8))),
                            ("colno", Nullable(UInt(32))),
                            ("lineno", Nullable(UInt(32))),
                            ("stack_level", UInt(16)),
                        ]
                    ),
                ),
                ("modules", Nested([("name", String()), ("version", String())])),
            ]
        )

        self.__transactions_columns = ColumnSet(
            [
                ("trace_id", Nullable(UUID())),
                ("span_id", Nullable(UInt(64))),
                ("transaction_hash", Nullable(UInt(64))),
                ("transaction_op", Nullable(String())),
                ("transaction_status", Nullable(UInt(8))),
                ("duration", Nullable(UInt(32))),
                (
                    "measurements",
                    Nested([("key", LowCardinality(String())), ("value", Float(64))]),
                ),
            ]
        )

        events_storage = get_storage(StorageKey.EVENTS)
        events_ro_storage = get_storage(StorageKey.EVENTS_RO)
        transactions_storage = get_storage(StorageKey.TRANSACTIONS)

        self.__time_group_columns: Mapping[str, str] = {}
        self.__time_parse_columns = ("timestamp",)

        super().__init__(
            storages=[events_storage, transactions_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=DiscoverQueryStorageSelector(
                    events_table=events_storage,
                    events_ro_table=events_ro_storage,
                    abstract_events_columns=self.__events_columns,
                    transactions_table=transactions_storage,
                    abstract_transactions_columns=self.__transactions_columns,
                ),
            ),
            abstract_column_set=(
                self.__common_columns
                + self.__events_columns
                + self.__transactions_columns
            ),
            writable_storage=None,
        )
Exemple #17
0
        ("ip_address_v6", IPv6(Modifiers(nullable=True))),
        ("user", String()),
        ("user_hash", UInt(64, Modifiers(readonly=True))),
        ("user_id", String(Modifiers(nullable=True))),
        ("user_name", String(Modifiers(nullable=True))),
        ("user_email", String(Modifiers(nullable=True))),
        ("sdk_name", String()),
        ("sdk_version", String()),
        ("http_method", String(Modifiers(nullable=True))),
        ("http_referer", String(Modifiers(nullable=True))),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_flattened", String()),
        ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("_contexts_flattened", String()),
        ("measurements", Nested([("key", String()), ("value", Float(64))]),),
        ("span_op_breakdowns", Nested([("key", String()), ("value", Float(64))]),),
        ("partition", UInt(16)),
        ("offset", UInt(64)),
        ("message_timestamp", DateTime()),
        ("retention_days", UInt(16)),
        ("deleted", UInt(8)),
        ("type", String(Modifiers(readonly=True))),
        ("message", String(Modifiers(readonly=True))),
        ("title", String(Modifiers(readonly=True))),
        ("timestamp", DateTime(Modifiers(readonly=True))),
    ]
)

schema = WritableTableSchema(
    columns=columns,
Exemple #18
0
    # This is expanded into arrays instead of being expressed as a
    # Nested column because, when adding new columns to a nested field
    # we need to provide a default for the entire array (each new column
    # is an array).
    # The same schema cannot be achieved with the Nested construct (where
    # we can only provide default for individual values), so, if we
    # use the Nested construct, this schema cannot match the one generated
    # by the migration framework (or by any ALTER statement).
    ("clickhouse_queries.sql", Array(String())),
    ("clickhouse_queries.status", Array(String())),
    ("clickhouse_queries.trace_id", Array(UUID(Modifiers(nullable=True)))),
    ("clickhouse_queries.duration_ms", Array(UInt(32))),
    ("clickhouse_queries.stats", Array(String())),
    ("clickhouse_queries.final", Array(UInt(8))),
    ("clickhouse_queries.cache_hit", Array(UInt(8))),
    ("clickhouse_queries.sample", Array(Float(32))),
    ("clickhouse_queries.max_threads", Array(UInt(8))),
    ("clickhouse_queries.num_days", Array(UInt(32))),
    ("clickhouse_queries.clickhouse_table", Array(String())),
    ("clickhouse_queries.query_id", Array(String())),
    # XXX: ``is_duplicate`` is currently not set when using the
    # ``Cache.get_readthrough`` query execution path. See GH-902.
    ("clickhouse_queries.is_duplicate", Array(UInt(8))),
    ("clickhouse_queries.consistent", Array(UInt(8))),
    ("clickhouse_queries.all_columns", Array(Array(String()))),
    ("clickhouse_queries.or_conditions", Array(UInt(8))),
    ("clickhouse_queries.where_columns", Array(Array(String()))),
    ("clickhouse_queries.where_mapping_columns", Array(Array(String()))),
    ("clickhouse_queries.groupby_columns", Array(Array(String()))),
    ("clickhouse_queries.array_join_columns", Array(Array(String()))),
])
class Migration(migration.ClickhouseNodeMigration):
    blocking = False
    local_table_name = "generic_metric_sets_raw_local"
    dist_table_name = "generic_metric_sets_raw_dist"
    columns: Sequence[Column[Modifiers]] = [
        Column("use_case_id", String(Modifiers(low_cardinality=True))),
        Column("org_id", UInt(64)),
        Column("project_id", UInt(64)),
        Column("metric_id", UInt(64)),
        Column("timestamp", DateTime()),
        Column("retention_days", UInt(16)),
        Column(
            "tags",
            Nested([
                ("key", UInt(64)),
                ("indexed_value", UInt(64)),
                ("raw_value", String()),
            ]),
        ),
        Column("set_values", Array(UInt(64))),
        Column("count_value", Float(64)),
        Column("distribution_values", Array(Float(64))),
        Column("metric_type", String(Modifiers(low_cardinality=True))),
        Column("materialization_version", UInt(8)),
        Column("timeseries_id", UInt(32)),
        Column("partition", UInt(16)),
        Column("offset", UInt(64)),
    ]

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                engine=table_engines.MergeTree(
                    storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                    order_by=
                    "(use_case_id, org_id, project_id, metric_id, timestamp)",
                    partition_by=
                    "(toStartOfInterval(timestamp, toIntervalDay(3)))",
                    ttl="timestamp + toIntervalDay(7)",
                ),
                columns=self.columns,
            )
        ]

    def backwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
            )
        ]

    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.dist_table_name,
                engine=table_engines.Distributed(
                    local_table_name=self.local_table_name,
                    sharding_key="cityHash64(timeseries_id)",
                ),
                columns=self.columns,
            )
        ]

    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.dist_table_name,
            )
        ]
Exemple #20
0
    String,
    UInt,
    UUID,
    WithCodecs,
    WithDefault,
)
from snuba.migrations.parse_schema import _get_column

test_data = [
    # Basic types
    (("Date", "", "", ""), Date()),
    (("DateTime", "", "", ""), DateTime()),
    (("Enum8('success' = 0, 'error' = 1)", "", "", ""),
     Enum([("success", 0), ("error", 1)])),
    (("FixedString(32)", "", "", ""), FixedString(32)),
    (("Float32", "", "", ""), Float(32)),
    (("IPv4", "", "", ""), IPv4()),
    (("IPv6", "", "", ""), IPv6()),
    (("String", "", "", ""), String()),
    (("UInt32", "", "", ""), UInt(32)),
    (("UUID", "", "", ""), UUID()),
    # Aggregate functions
    (("AggregateFunction(uniq, UInt8)", "", "", ""),
     AggregateFunction("uniq", UInt(8))),
    (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""),
     AggregateFunction("countIf", UUID(), UInt(8))),
    (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""),
     AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))),
    # Array
    (("Array(String)", "", "", ""), Array(String())),
    (("Array(DateTime)", "", "", ""), Array(DateTime())),
Exemple #21
0
 def visit_float(self, node: Node, visited_children: Iterable[Any]) -> ColumnType:
     size = int(node.children[1].text)
     return Float(size)
Exemple #22
0
        ("os_name", String(Modifiers(nullable=True))),
        ("os_rooted", UInt(8, Modifiers(nullable=True))),
    ]
)

promoted_context_columns = ColumnSet(
    [
        ("os_build", String(Modifiers(nullable=True))),
        ("os_kernel_version", String(Modifiers(nullable=True))),
        ("device_name", String(Modifiers(nullable=True))),
        ("device_brand", String(Modifiers(nullable=True))),
        ("device_locale", String(Modifiers(nullable=True))),
        ("device_uuid", String(Modifiers(nullable=True))),
        ("device_model_id", String(Modifiers(nullable=True))),
        ("device_arch", String(Modifiers(nullable=True))),
        ("device_battery_level", Float(32, Modifiers(nullable=True))),
        ("device_orientation", String(Modifiers(nullable=True))),
        ("device_simulator", UInt(8, Modifiers(nullable=True))),
        ("device_online", UInt(8, Modifiers(nullable=True))),
        ("device_charging", UInt(8, Modifiers(nullable=True))),
    ]
)

required_columns = ColumnSet(
    [
        ("event_id", FixedString(32)),
        ("project_id", UInt(64)),
        ("group_id", UInt(64)),
        ("timestamp", DateTime()),
        ("deleted", UInt(8)),
        ("retention_days", UInt(16)),
Exemple #23
0
    Column("projects", Array(UInt(64))),
    Column("organization", Nullable(UInt(64))),
    Column("timestamp", DateTime()),
    Column("duration_ms", UInt(32)),
    Column("status", status_type),
    Column(
        "clickhouse_queries",
        Nested([
            Column("sql", String()),
            Column("status", status_type),
            Column("trace_id", Nullable(UUID())),
            Column("duration_ms", UInt(32)),
            Column("stats", String()),
            Column("final", UInt(8)),
            Column("cache_hit", UInt(8)),
            Column("sample", Float(32)),
            Column("max_threads", UInt(8)),
            Column("num_days", UInt(32)),
            Column("clickhouse_table", LowCardinality(String())),
            Column("query_id", String()),
            Column("is_duplicate", UInt(8)),
            Column("consistent", UInt(8)),
        ]),
    ),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
Exemple #24
0
        ("ip_address_v6", Nullable(IPv6())),
        ("user", String()),
        ("user_hash", ReadOnly(UInt(64))),
        ("user_id", Nullable(String())),
        ("user_name", Nullable(String())),
        ("user_email", Nullable(String())),
        ("sdk_name", String()),
        ("sdk_version", String()),
        ("http_method", Nullable(String())),
        ("http_referer", Nullable(String())),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_flattened", String()),
        ("_tags_hash_map", ReadOnly(Array(UInt(64)))),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("_contexts_flattened", String()),
        ("measurements", Nested([("key", String()), ("value", Float(64))]),),
        ("partition", UInt(16)),
        ("offset", UInt(64)),
        ("message_timestamp", DateTime()),
        ("retention_days", UInt(16)),
        ("deleted", UInt(8)),
    ]
)

schema = WritableTableSchema(
    columns=columns,
    local_table_name="transactions_local",
    dist_table_name="transactions_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
    mandatory_conditions=[],
    prewhere_candidates=["event_id", "transaction_name", "transaction", "title"],
Exemple #25
0
        ("os_name", Nullable(String())),
        ("os_rooted", Nullable(UInt(8))),
    ]
)

promoted_context_columns = ColumnSet(
    [
        ("os_build", Nullable(String())),
        ("os_kernel_version", Nullable(String())),
        ("device_name", Nullable(String())),
        ("device_brand", Nullable(String())),
        ("device_locale", Nullable(String())),
        ("device_uuid", Nullable(String())),
        ("device_model_id", Nullable(String())),
        ("device_arch", Nullable(String())),
        ("device_battery_level", Nullable(Float(32))),
        ("device_orientation", Nullable(String())),
        ("device_simulator", Nullable(UInt(8))),
        ("device_online", Nullable(UInt(8))),
        ("device_charging", Nullable(UInt(8))),
    ]
)

required_columns = ColumnSet(
    [
        ("event_id", FixedString(32)),
        ("project_id", UInt(64)),
        ("group_id", UInt(64)),
        ("timestamp", DateTime()),
        ("deleted", UInt(8)),
        ("retention_days", UInt(16)),
Exemple #26
0
    ("projects", Array(UInt(64))),
    ("organization", Nullable(UInt(64))),
    ("timestamp", DateTime()),
    ("duration_ms", UInt(32)),
    ("status", status_type),
    (
        "clickhouse_queries",
        Nested([
            ("sql", String()),
            ("status", status_type),
            ("trace_id", Nullable(UUID())),
            ("duration_ms", UInt(32)),
            ("stats", String()),
            ("final", UInt(8)),
            ("cache_hit", UInt(8)),
            ("sample", Float(32)),
            ("max_threads", UInt(8)),
            ("num_days", UInt(32)),
            ("clickhouse_table", LowCardinality(String())),
            ("query_id", String()),
            ("is_duplicate", UInt(8)),
            ("consistent", UInt(8)),
        ]),
    ),
])

schema = MergeTreeSchema(
    columns=columns,
    local_table_name="querylog_local",
    dist_table_name="querylog_dist",
    order_by="(toStartOfDay(timestamp), request_id)",
from typing import Sequence

from snuba.clickhouse.columns import AggregateFunction, Column, Float
from snuba.migrations import migration, operations
from snuba.migrations.columns import MigrationModifiers
from snuba.migrations.snuba_migrations.metrics.templates import (
    get_forward_migrations_dist,
    get_forward_migrations_local,
    get_reverse_table_migration,
)

COL_SCHEMA: Sequence[Column[MigrationModifiers]] = [
    Column(
        "percentiles",
        AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                          [Float(64)]),
    ),
    Column("min", AggregateFunction("min", [Float(64)])),
    Column("max", AggregateFunction("max", [Float(64)])),
    Column("avg", AggregateFunction("avg", [Float(64)])),
    Column("sum", AggregateFunction("sum", [Float(64)])),
    Column("count", AggregateFunction("count", [Float(64)])),
]


class Migration(migration.ClickhouseNodeMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return get_forward_migrations_local(
            source_table_name="metrics_distributions_buckets_local",
Exemple #28
0
    Column("tags", Nested([Column("key", UInt(64)),
                           Column("value", UInt(64))])),
]

POST_VALUES_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [
    Column("materialization_version", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("partition", UInt(16)),
    Column("offset", UInt(64)),
]

COL_SCHEMA_DISTRIBUTIONS: Sequence[Column[Modifiers]] = [
    Column(
        "percentiles",
        AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                          [Float(64)]),
    ),
    Column("min", AggregateFunction("min", [Float(64)])),
    Column("max", AggregateFunction("max", [Float(64)])),
    Column("avg", AggregateFunction("avg", [Float(64)])),
    Column("sum", AggregateFunction("sum", [Float(64)])),
    Column("count", AggregateFunction("count", [Float(64)])),
]

COL_SCHEMA_DISTRIBUTIONS_V2: Sequence[Column[Modifiers]] = [
    *COL_SCHEMA_DISTRIBUTIONS,
    Column("histogram", AggregateFunction("histogram(250)", [Float(64)])),
]


def get_forward_bucket_table_local(
Exemple #29
0
        ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
        ("user_id", Nullable(String())),
        ("user_name", Nullable(String())),
        ("user_email", Nullable(String())),
        ("sdk_name", WithDefault(LowCardinality(String()), "''")),
        ("sdk_version", WithDefault(LowCardinality(String()), "''")),
        ("http_method", LowCardinality(Nullable(String()))),
        ("http_referer", Nullable(String())),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_flattened", String()),
        ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("_contexts_flattened", String()),
        (
            "measurements",
            Nested([("key", LowCardinality(String())), ("value", Float(64))]),
        ),
        ("partition", UInt(16)),
        ("offset", UInt(64)),
        ("message_timestamp", DateTime()),
        ("retention_days", UInt(16)),
        ("deleted", UInt(8)),
    ]
)

schema = ReplacingMergeTreeSchema(
    columns=columns,
    local_table_name="transactions_local",
    dist_table_name="transactions_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
    mandatory_conditions=[],
Exemple #30
0
    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            mandatory_conditions=[('deleted', '=', 0)],
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            )
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
            time_parse_columns=('timestamp', 'received')
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )