Ejemplo n.º 1
0
def test_joined_columns():
    schema = JoinedSchema(complex_join_structure)
    columns = schema.get_columns()

    expected_columns = ColumnSet([
        ("t1.t1c1", UInt(64)),
        ("t1.t1c2", String()),
        ("t1.t1c3", Nested([
            ("t11c4", UInt(64))
        ])),
        ("t2.t2c1", UInt(64)),
        ("t2.t2c2", String()),
        ("t2.t2c3", Nested([
            ("t21c4", UInt(64))
        ])),
        ("t3.t3c1", UInt(64)),
        ("t3.t3c2", String()),
        ("t3.t3c3", Nested([
            ("t31c4", UInt(64))
        ])),
    ])

    # Checks equality between flattened columns. Nested columns are
    # exploded here
    assert set([c.flattened for c in columns]) \
        == set([c.flattened for c in expected_columns])

    # Checks equality between the structured set of columns. Nested columns
    # are not exploded.
    assert set([repr(c) for c in columns.columns]) \
        == set([repr(c) for c in expected_columns.columns])
Ejemplo n.º 2
0
    def __init__(
        self,
        writable_storage_key: StorageKey,
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
    ) -> None:
        writable_storage = get_writable_storage(writable_storage_key)
        readable_storage = get_storage(readable_storage_key)

        super().__init__(
            storages=[writable_storage, readable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ]),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="timestamp",
        )
Ejemplo n.º 3
0
    def test_schema(self):
        cols = ColumnSet([("foo", UInt(8)),
                          ("bar", Nested([("qux:mux", String())]))])

        assert cols.for_schema() == "foo UInt8, bar Nested(`qux:mux` String)"
        assert cols["foo"].type == UInt(8)
        assert cols["bar.qux:mux"].type == Array(String())
Ejemplo n.º 4
0
    def test_schema(self):
        cols = ColumnSet([('foo', UInt(8)),
                          ('bar', Nested([('qux:mux', String())]))])

        assert cols.for_schema() == 'foo UInt8, bar Nested(`qux:mux` String)'
        assert cols['foo'].type == UInt(8)
        assert cols['bar.qux:mux'].type == Array(String())
Ejemplo n.º 5
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column=None,
        )
Ejemplo n.º 6
0
 def forwards_dist(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "measurements",
                 Nested([("key", LowCardinality(String())), ("value", Float(64))]),
             ),
             after="_contexts_flattened",
         ),
     ]
Ejemplo n.º 7
0
    def __init__(
        self,
        writable_storage_key: Optional[StorageKey],
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
        abstract_column_set: Optional[ColumnSet] = None,
        validators: Optional[Sequence[QueryValidator]] = None,
    ) -> None:
        writable_storage = (get_writable_storage(writable_storage_key)
                            if writable_storage_key else None)
        readable_storage = get_storage(readable_storage_key)
        storages = [readable_storage]
        if writable_storage:
            storages.append(writable_storage)

        if abstract_column_set is None:
            abstract_column_set = ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("bucketed_time", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ])

        if validators is None:
            validators = [
                EntityRequiredColumnValidator({"org_id", "project_id"}),
                GranularityValidator(minimum=10),
            ]

        super().__init__(
            storages=storages,
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=abstract_column_set,
            join_relationships={},
            writable_storage=writable_storage,
            validators=validators,
            required_time_column="timestamp",
        )
Ejemplo n.º 8
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "span_op_breakdowns",
                 Nested([
                     ("key", String(Modifiers(low_cardinality=True))),
                     ("value", Float(64)),
                 ]),
             ),
             after="measurements.value",
         ),
     ]
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "measurements",
                 Nested([
                     ("key", String(Modifiers(low_cardinality=True))),
                     ("value", Float(64)),
                 ]),
             ),
             after="_contexts_flattened",
         ),
     ]
Ejemplo n.º 10
0
from snuba.datasets.storages import StorageKey
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer, )
from snuba.query.processors.table_rate_limit import TableRateLimit

aggregated_columns = [
    Column("org_id", UInt(64)),
    Column("use_case_id", String()),
    Column("project_id", UInt(64)),
    Column("metric_id", UInt(64)),
    Column("granularity", UInt(8)),
    Column("timestamp", DateTime()),
    Column("retention_days", UInt(16)),
    Column(
        "tags",
        Nested([("key", UInt(64)), ("indexed_value", UInt(64)),
                ("raw_value", String())]),
    ),
    Column("_raw_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))),
    Column("_indexed_tags_hash", Array(UInt(64),
                                       SchemaModifiers(readonly=True))),
]

sets_storage = ReadableTableStorage(
    storage_key=StorageKey.GENERIC_METRICS_SETS,
    storage_set_key=StorageSetKey.GENERIC_METRICS_SETS,
    schema=TableSchema(
        local_table_name="generic_metrics_sets_local",
        dist_table_name="generic_metrics_sets_dist",
        storage_set_key=StorageSetKey.GENERIC_METRICS_SETS,
        columns=ColumnSet([
            *aggregated_columns,
Ejemplo n.º 11
0
    def __init__(self) -> None:
        columns = ColumnSet(
            [
                ("project_id", UInt(64)),
                ("event_id", UUID()),
                ("trace_id", UUID()),
                ("span_id", UInt(64)),
                ("transaction_name", LowCardinality(String())),
                (
                    "transaction_hash",
                    Materialized(UInt(64), "cityHash64(transaction_name)",),
                ),
                ("transaction_op", LowCardinality(String())),
                ("transaction_status", WithDefault(UInt(8), UNKNOWN_SPAN_STATUS)),
                ("start_ts", DateTime()),
                ("start_ms", UInt(16)),
                ("_start_date", Materialized(Date(), "toDate(start_ts)"),),
                ("finish_ts", DateTime()),
                ("finish_ms", UInt(16)),
                ("_finish_date", Materialized(Date(), "toDate(finish_ts)"),),
                ("duration", UInt(32)),
                ("platform", LowCardinality(String())),
                ("environment", LowCardinality(Nullable(String()))),
                ("release", LowCardinality(Nullable(String()))),
                ("dist", LowCardinality(Nullable(String()))),
                ("ip_address_v4", Nullable(IPv4())),
                ("ip_address_v6", Nullable(IPv6())),
                ("user", WithDefault(String(), "''",)),
                ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
                ("user_id", Nullable(String())),
                ("user_name", Nullable(String())),
                ("user_email", Nullable(String())),
                ("sdk_name", WithDefault(LowCardinality(String()), "''")),
                ("sdk_version", WithDefault(LowCardinality(String()), "''")),
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                ("contexts", Nested([("key", String()), ("value", String())])),
                ("_contexts_flattened", String()),
                ("partition", UInt(16)),
                ("offset", UInt(64)),
                ("retention_days", UInt(16)),
                ("deleted", UInt(8)),
            ]
        )

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name="transactions_local",
            dist_table_name="transactions_dist",
            mandatory_conditions=[],
            prewhere_candidates=["event_id", "project_id"],
            order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))",
            partition_by="(retention_days, toMonday(_finish_date))",
            version_column="deleted",
            sample_expr=None,
            migration_function=transactions_migrations,
        )

        dataset_schemas = DatasetSchemas(read_schema=schema, write_schema=schema,)

        self.__tags_processor = TagColumnProcessor(
            columns=columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=TransactionsTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=TransactionsMessageProcessor(), default_topic="events",
                ),
            ),
            time_group_columns={
                "bucketed_start": "start_ts",
                "bucketed_end": "finish_ts",
            },
            time_parse_columns=("start_ts", "finish_ts"),
        )
Ejemplo n.º 12
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet(
            [
                ("event_id", FixedString(32)),
                ("project_id", UInt(64)),
                ("type", Nullable(String())),
                ("timestamp", DateTime()),
                ("platform", Nullable(String())),
                ("environment", Nullable(String())),
                ("release", Nullable(String())),
                ("dist", Nullable(String())),
                ("user", Nullable(String())),
                ("transaction", Nullable(String())),
                ("message", Nullable(String())),
                ("title", Nullable(String())),
                # User
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # SDK
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                # geo location context
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # Other tags and context
                ("tags", Nested([("key", String()), ("value", String())])),
                ("contexts", Nested([("key", String()), ("value", String())])),
            ]
        )

        self.__events_columns = ColumnSet(
            [
                ("group_id", Nullable(UInt(64))),
                ("primary_hash", Nullable(FixedString(32))),
                # Promoted tags
                ("level", Nullable(String())),
                ("logger", Nullable(String())),
                ("server_name", Nullable(String())),
                ("site", Nullable(String())),
                ("url", Nullable(String())),
                ("search_message", Nullable(String())),
                ("location", Nullable(String())),
                ("culprit", Nullable(String())),
                ("received", Nullable(DateTime())),
                ("sdk_integrations", Nullable(Array(String()))),
                ("version", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested(
                        [
                            ("type", Nullable(String())),
                            ("value", Nullable(String())),
                            ("mechanism_type", Nullable(String())),
                            ("mechanism_handled", Nullable(UInt(8))),
                        ]
                    ),
                ),
                (
                    "exception_frames",
                    Nested(
                        [
                            ("abs_path", Nullable(String())),
                            ("filename", Nullable(String())),
                            ("package", Nullable(String())),
                            ("module", Nullable(String())),
                            ("function", Nullable(String())),
                            ("in_app", Nullable(UInt(8))),
                            ("colno", Nullable(UInt(32))),
                            ("lineno", Nullable(UInt(32))),
                            ("stack_level", UInt(16)),
                        ]
                    ),
                ),
                ("modules", Nested([("name", String()), ("version", String())])),
            ]
        )

        self.__transactions_columns = ColumnSet(
            [
                ("trace_id", Nullable(UUID())),
                ("span_id", Nullable(UInt(64))),
                ("transaction_hash", Nullable(UInt(64))),
                ("transaction_op", Nullable(String())),
                ("transaction_status", Nullable(UInt(8))),
                ("duration", Nullable(UInt(32))),
                (
                    "measurements",
                    Nested([("key", LowCardinality(String())), ("value", Float(64))]),
                ),
            ]
        )

        events_storage = get_storage(StorageKey.EVENTS)
        events_ro_storage = get_storage(StorageKey.EVENTS_RO)
        transactions_storage = get_storage(StorageKey.TRANSACTIONS)

        self.__time_group_columns: Mapping[str, str] = {}
        self.__time_parse_columns = ("timestamp",)

        super().__init__(
            storages=[events_storage, transactions_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=DiscoverQueryStorageSelector(
                    events_table=events_storage,
                    events_ro_table=events_ro_storage,
                    abstract_events_columns=self.__events_columns,
                    transactions_table=transactions_storage,
                    abstract_transactions_columns=self.__transactions_columns,
                ),
            ),
            abstract_column_set=(
                self.__common_columns
                + self.__events_columns
                + self.__transactions_columns
            ),
            writable_storage=None,
        )
Ejemplo n.º 13
0
    def __init__(self):
        columns = ColumnSet([
            ('project_id', UInt(64)),
            ('event_id', UUID()),
            ('trace_id', UUID()),
            ('span_id', UInt(64)),
            ('transaction_name', String()),
            ('transaction_hash',
             Materialized(
                 UInt(64),
                 'cityHash64(transaction_name)',
             )),
            ('transaction_op', LowCardinality(String())),
            ('start_ts', DateTime()),
            ('start_ms', UInt(16)),
            ('finish_ts', DateTime()),
            ('finish_ms', UInt(16)),
            ('duration',
             Materialized(
                 UInt(32),
                 '((finish_ts - start_ts) * 1000) + (finish_ms - start_ms)',
             )),
            ('platform', LowCardinality(String())),
            ('environment', Nullable(String())),
            ('release', Nullable(String())),
            ('dist', Nullable(String())),
            ('ip_address_v4', Nullable(IPv4())),
            ('ip_address_v6', Nullable(IPv6())),
            ('user', WithDefault(
                String(),
                "''",
            )),
            ('user_id', Nullable(String())),
            ('user_name', Nullable(String())),
            ('user_email', Nullable(String())),
            ('tags', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('contexts', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('partition', UInt(16)),
            ('offset', UInt(64)),
            ('retention_days', UInt(16)),
            ('deleted', UInt(8)),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='transactions_local',
            dist_table_name='transactions_dist',
            order_by=
            '(project_id, toStartOfDay(start_ts), transaction_hash, start_ts, start_ms, trace_id, span_id)',
            partition_by='(retention_days, toMonday(start_ts))',
            version_column='deleted',
            sample_expr=None,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            processor=TransactionsMessageProcessor(),
            default_topic="events",
            time_group_columns={
                'bucketed_start': 'start_ts',
                'bucketed_end': 'finish_ts',
            },
        )
Ejemplo n.º 14
0
 Column("platform", String(Modifiers(low_cardinality=True))),
 Column("environment", String(Modifiers(nullable=True,
                                        low_cardinality=True))),
 Column("release", String(Modifiers(nullable=True, low_cardinality=True))),
 Column("dist", String(Modifiers(nullable=True, low_cardinality=True))),
 Column("ip_address_v4", IPv4(Modifiers(nullable=True))),
 Column("ip_address_v6", IPv6(Modifiers(nullable=True))),
 Column("user", (String(Modifiers(default="''")))),
 Column("user_hash", UInt(64, Modifiers(materialized="cityHash64(user)"))),
 Column("user_id", String(Modifiers(nullable=True))),
 Column("user_name", String(Modifiers(nullable=True))),
 Column("user_email", String(Modifiers(nullable=True))),
 Column("sdk_name", String(Modifiers(nullable=True, low_cardinality=True))),
 Column("sdk_version", String(Modifiers(nullable=True,
                                        low_cardinality=True))),
 Column("tags", Nested([("key", String()), ("value", String())])),
 Column("_tags_flattened", String()),
 Column("contexts", Nested([("key", String()), ("value", String())])),
 Column("_contexts_flattened", String()),
 Column("transaction_name",
        String(Modifiers(low_cardinality=True, default="''"))),
 Column(
     "transaction_hash",
     UInt(64, Modifiers(materialized="cityHash64(transaction_name)")),
 ),
 Column("span_id", UInt(64, Modifiers(nullable=True))),
 Column("trace_id", UUID(Modifiers(nullable=True))),
 Column("partition", UInt(16)),
 Column("offset", UInt(64, Modifiers(codecs=["DoubleDelta", "LZ4"]))),
 Column("message_timestamp", DateTime()),
 Column("retention_days", UInt(16)),
Ejemplo n.º 15
0
def test_events_promoted_boolean_context() -> None:
    columns = ColumnSet(
        [
            ("device_charging", UInt(8, Modifier(nullable=True))),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ]
    )
    query = ClickhouseQuery(
        Table("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "device.charging"),
                            ),
                        ),
                    ),
                ),
            )
        ],
    )

    expected = ClickhouseQuery(
        Table("events", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "if",
                    (
                        binary_condition(
                            ConditionFunctions.IN,
                            FunctionCall(
                                None,
                                "toString",
                                (Column(None, None, "device_charging"),),
                            ),
                            literals_tuple(
                                None, [Literal(None, "1"), Literal(None, "True")]
                            ),
                        ),
                        Literal(None, "True"),
                        Literal(None, "False"),
                    ),
                ),
            )
        ],
    )

    settings = HTTPQuerySettings()
    MappingColumnPromoter(
        {"contexts": {"device.charging": "device_charging"}}, cast_to_string=True
    ).process_query(query, settings)
    EventsPromotedBooleanContextsProcessor().process_query(query, settings)

    assert query.get_selected_columns() == expected.get_selected_columns()
Ejemplo n.º 16
0
        ("dist", String(Modifiers(nullable=True))),
        ("transaction_name", String()),
        ("message", String()),
        ("title", String()),
        ("user", String()),
        ("user_hash", UInt(64)),
        ("user_id", String(Modifiers(nullable=True))),
        ("user_name", String(Modifiers(nullable=True))),
        ("user_email", String(Modifiers(nullable=True))),
        ("ip_address_v4", IPv4(Modifiers(nullable=True))),
        ("ip_address_v6", IPv6(Modifiers(nullable=True))),
        ("sdk_name", String(Modifiers(nullable=True))),
        ("sdk_version", String(Modifiers(nullable=True))),
        ("http_method", String(Modifiers(nullable=True))),
        ("http_referer", String(Modifiers(nullable=True))),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_hash_map", Array(UInt(64))),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("trace_id", UUID(Modifiers(nullable=True))),
        ("deleted", UInt(8)),
    ]
)

schema = TableSchema(
    columns=columns,
    local_table_name="discover_local",
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
    mandatory_conditions=mandatory_conditions,
)
Ejemplo n.º 17
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
            ("span_id", UInt(64, Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        super().__init__(
            storages=[discover_storage],
            query_pipeline_builder=discover_pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 18
0
    Column("dataset", LowCardinality(String())),
    Column("projects", Array(UInt(64))),
    Column("organization", Nullable(UInt(64))),
    Column("timestamp", DateTime()),
    Column("duration_ms", UInt(32)),
    Column("status", status_type),
    Column(
        "clickhouse_queries",
        Nested([
            Column("sql", String()),
            Column("status", status_type),
            Column("trace_id", Nullable(UUID())),
            Column("duration_ms", UInt(32)),
            Column("stats", String()),
            Column("final", UInt(8)),
            Column("cache_hit", UInt(8)),
            Column("sample", Float(32)),
            Column("max_threads", UInt(8)),
            Column("num_days", UInt(32)),
            Column("clickhouse_table", LowCardinality(String())),
            Column("query_id", String()),
            Column("is_duplicate", UInt(8)),
            Column("consistent", UInt(8)),
        ]),
    ),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
Ejemplo n.º 19
0
class Migration(migration.ClickhouseNodeMigration):
    blocking = False
    view_name = "generic_metric_sets_aggregation_mv"
    dest_table_columns: Sequence[Column[Modifiers]] = [
        Column("org_id", UInt(64)),
        Column("project_id", UInt(64)),
        Column("metric_id", UInt(64)),
        Column("granularity", UInt(8)),
        Column("timestamp",
               DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))),
        Column("retention_days", UInt(16)),
        Column(
            "tags",
            Nested([
                ("key", UInt(64)),
                ("indexed_value", UInt(64)),
                ("raw_value", String()),
            ]),
        ),
        Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        Column("use_case_id", String(Modifiers(low_cardinality=True))),
    ]

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.view_name,
            ),
            operations.CreateMaterializedView(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                view_name=self.view_name,
                columns=self.dest_table_columns,
                destination_table_name="generic_metric_sets_local",
                query="""
                SELECT
                    use_case_id,
                    org_id,
                    project_id,
                    metric_id,
                    arrayJoin(granularities) as granularity,
                    tags.key,
                    tags.indexed_value,
                    tags.raw_value,
                    toDateTime(multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1) *
                      intDiv(toUnixTimestamp(timestamp),
                             multiIf(granularity=0,10,granularity=1,60,granularity=2,3600,granularity=3,86400,-1))) as timestamp,
                    retention_days,
                    uniqCombined64State(arrayJoin(set_values)) as value
                FROM generic_metric_sets_raw_local
                WHERE materialization_version = 1
                  AND metric_type = 'set'
                GROUP BY
                    use_case_id,
                    org_id,
                    project_id,
                    metric_id,
                    tags.key,
                    tags.indexed_value,
                    tags.raw_value,
                    timestamp,
                    granularity,
                    retention_days
                """,
            ),
        ]

    def backwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.view_name,
            )
        ]

    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
        return []

    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
        return []
Ejemplo n.º 20
0
    Array,
    Column,
    DateTime,
    Nested,
    Nullable,
    String,
    UInt,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import LowCardinality, Materialized, WithDefault

UNKNOWN_SPAN_STATUS = SPAN_STATUS_NAME_TO_CODE["unknown"]

tags_col = Column("tags", Nested([("key", String()), ("value", String())]))

columns = [
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("trace_id", UUID()),
    Column("transaction_span_id", UInt(64)),
    Column("span_id", UInt(64)),
    Column("parent_span_id", Nullable(UInt(64))),
    Column("transaction_name", LowCardinality(String())),
    Column("description", String()),  # description in span
    Column("op", LowCardinality(String())),
    Column(
        "status",
        WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),
    ),
Ejemplo n.º 21
0
def test_events_boolean_context() -> None:
    columns = ColumnSet(
        [("contexts", Nested([("key", String()), ("value", String())]))]
    )
    query = ClickhouseQuery(
        Table("errors", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "device.charging"),
                            ),
                        ),
                    ),
                ),
            )
        ],
    )

    expected = ClickhouseQuery(
        Table("errors", columns),
        selected_columns=[
            SelectedExpression(
                "contexts[device.charging]",
                FunctionCall(
                    "contexts[device.charging]",
                    "if",
                    (
                        binary_condition(
                            ConditionFunctions.IN,
                            FunctionCall(
                                None,
                                "arrayElement",
                                (
                                    Column(None, None, "contexts.value"),
                                    FunctionCall(
                                        None,
                                        "indexOf",
                                        (
                                            Column(None, None, "contexts.key"),
                                            Literal(None, "device.charging"),
                                        ),
                                    ),
                                ),
                            ),
                            literals_tuple(
                                None, [Literal(None, "1"), Literal(None, "True")]
                            ),
                        ),
                        Literal(None, "True"),
                        Literal(None, "False"),
                    ),
                ),
            )
        ],
    )

    settings = HTTPQuerySettings()
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert query.get_selected_columns() == expected.get_selected_columns()
Ejemplo n.º 22
0
    Nested,
)
from snuba.datasets.schemas.tables import MergeTreeSchema
from snuba.datasets.schemas.join import (
    JoinConditionExpression,
    JoinCondition,
    JoinClause,
    JoinType,
    TableJoinNode,
)

table1 = MergeTreeSchema(
    columns=ColumnSet([
        ("t1c1", UInt(64)),
        ("t1c2", String()),
        ("t1c3", Nested([("t11c4", UInt(64))])),
    ]),
    local_table_name="table1",
    dist_table_name="table1",
    order_by="",
    partition_by="",
).get_data_source()

table2 = MergeTreeSchema(
    columns=ColumnSet([
        ("t2c1", UInt(64)),
        ("t2c2", String()),
        ("t2c3", Nested([("t21c4", UInt(64))])),
    ]),
    local_table_name="table2",
    dist_table_name="table2",
Ejemplo n.º 23
0
class Migration(migration.ClickhouseNodeMigration):
    blocking = False
    granularity = "2048"
    local_table_name = "generic_metric_sets_local"
    columns: Sequence[Column[Modifiers]] = [
        Column("org_id", UInt(64)),
        Column("project_id", UInt(64)),
        Column("metric_id", UInt(64)),
        Column("granularity", UInt(8)),
        Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))),
        Column("retention_days", UInt(16)),
        Column(
            "tags",
            Nested(
                [
                    ("key", UInt(64)),
                    ("indexed_value", UInt(64)),
                    ("raw_value", String()),
                ]
            ),
        ),
        Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        Column("use_case_id", String(Modifiers(low_cardinality=True))),
    ]

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                engine=table_engines.AggregatingMergeTree(
                    storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                    order_by="(org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.indexed_value, tags.raw_value, retention_days, use_case_id)",
                    primary_key="(org_id, project_id, metric_id, granularity, timestamp)",
                    partition_by="(retention_days, toMonday(timestamp))",
                    settings={"index_granularity": self.granularity},
                    ttl="timestamp + toIntervalDay(retention_days)",
                ),
                columns=self.columns,
            ),
            operations.AddColumn(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                column=Column(
                    "_indexed_tags_hash",
                    Array(
                        UInt(64),
                        Modifiers(
                            materialized=hash_map_int_column_definition(
                                "tags.key", "tags.indexed_value"
                            )
                        ),
                    ),
                ),
            ),
            operations.AddColumn(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                column=Column(
                    "_raw_tags_hash",
                    Array(
                        UInt(64),
                        Modifiers(
                            materialized=hash_map_int_key_str_value_column_definition(
                                "tags.key", "tags.raw_value"
                            )
                        ),
                    ),
                ),
            ),
            operations.AddIndex(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                index_name="bf_indexed_tags_hash",
                index_expression="_indexed_tags_hash",
                index_type="bloom_filter()",
                granularity=1,
            ),
            operations.AddIndex(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                index_name="bf_raw_tags_hash",
                index_expression="_raw_tags_hash",
                index_type="bloom_filter()",
                granularity=1,
            ),
            operations.AddIndex(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                index_name="bf_tags_key_hash",
                index_expression="tags.key",
                index_type="bloom_filter()",
                granularity=1,
            ),
        ]

    def backwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
            )
        ]

    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name="generic_metric_sets_aggregated_dist",
                engine=table_engines.Distributed(
                    local_table_name=self.local_table_name, sharding_key=None
                ),
                columns=self.columns,
            )
        ]

    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name="generic_metric_sets_aggregated_dist",
            )
        ]
Ejemplo n.º 24
0
 ("platform", String()),
 ("environment", String(Modifiers(nullable=True))),
 ("release", String(Modifiers(nullable=True))),
 ("dist", String(Modifiers(nullable=True))),
 ("ip_address_v4", IPv4(Modifiers(nullable=True))),
 ("ip_address_v6", IPv6(Modifiers(nullable=True))),
 ("user", String()),
 ("user_hash", UInt(64, Modifiers(readonly=True))),
 ("user_id", String(Modifiers(nullable=True))),
 ("user_name", String(Modifiers(nullable=True))),
 ("user_email", String(Modifiers(nullable=True))),
 ("sdk_name", String()),
 ("sdk_version", String()),
 ("http_method", String(Modifiers(nullable=True))),
 ("http_referer", String(Modifiers(nullable=True))),
 ("tags", Nested([("key", String()), ("value", String())])),
 ("_tags_flattened", String()),
 ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
 ("contexts", Nested([("key", String()), ("value", String())])),
 ("_contexts_flattened", String()),
 ("measurements", Nested([("key", String()), ("value", Float(64))]),),
 ("span_op_breakdowns", Nested([("key", String()), ("value", Float(64))]),),
 ("partition", UInt(16)),
 ("offset", UInt(64)),
 ("message_timestamp", DateTime()),
 ("retention_days", UInt(16)),
 ("deleted", UInt(8)),
 ("type", String(Modifiers(readonly=True))),
 ("message", String(Modifiers(readonly=True))),
 ("title", String(Modifiers(readonly=True))),
 ("timestamp", DateTime(Modifiers(readonly=True))),
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("group_id", UInt(64)),
             after="project_id",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column_name="device_model",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("sdk_integrations", Array(String())),
             after="exception_frames",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("modules.name", Nested([("name", String())])),
             after="sdk_integrations",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("culprit", Nullable(String())),
             after="sdk_integrations",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("search_message", Nullable(String())),
             after="received",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("title", Nullable(String())),
             after="search_message",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("location", Nullable(String())),
             after="title",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("_tags_flattened", String()),
             after="tags",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("message_timestamp", DateTime()),
             after="partition",
         ),
     ]
Ejemplo n.º 26
0
    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            mandatory_conditions=[('deleted', '=', 0)],
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            )
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
            time_parse_columns=('timestamp', 'received')
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Ejemplo n.º 27
0
 ("platform", LowCardinality(String())),
 ("environment", LowCardinality(Nullable(String()))),
 ("release", LowCardinality(Nullable(String()))),
 ("dist", LowCardinality(Nullable(String()))),
 ("ip_address_v4", Nullable(IPv4())),
 ("ip_address_v6", Nullable(IPv6())),
 ("user", WithDefault(String(), "''")),
 ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
 ("user_id", Nullable(String())),
 ("user_name", Nullable(String())),
 ("user_email", Nullable(String())),
 ("sdk_name", LowCardinality(Nullable(String()))),
 ("sdk_version", LowCardinality(Nullable(String()))),
 ("http_method", LowCardinality(Nullable(String()))),
 ("http_referer", Nullable(String())),
 ("tags", Nested([("key", String()), ("value", String())])),
 ("_tags_flattened", String()),
 ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
 ("contexts", Nested([("key", String()), ("value", String())])),
 ("_contexts_flattened", String()),
 ("transaction_name", WithDefault(LowCardinality(String()), "''")),
 ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"),),
 ("span_id", Nullable(UInt(64))),
 ("trace_id", Nullable(UUID())),
 ("partition", UInt(16)),
 ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
 ("message_timestamp", DateTime()),
 ("retention_days", UInt(16)),
 ("deleted", UInt(8)),
 ("group_id", UInt(64)),
 ("primary_hash", FixedString(32)),
Ejemplo n.º 28
0
from snuba.datasets.storages.tags_hash_map import INT_TAGS_HASH_MAP_COLUMN
from snuba.migrations import operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.utils.schemas import String

#: The granularity used for the initial materialized views.
#: This might differ from snuba.datasets.metrics.DEFAULT_GRANULARITY at
#: a later point.
ORIGINAL_GRANULARITY = 60

PRE_VALUE_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("metric_id", UInt(64)),
    Column("timestamp", DateTime()),
    Column("tags", Nested([Column("key", UInt(64)),
                           Column("value", UInt(64))])),
]

POST_VALUES_BUCKETS_COLUMNS: Sequence[Column[Modifiers]] = [
    Column("materialization_version", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("partition", UInt(16)),
    Column("offset", UInt(64)),
]

COL_SCHEMA_DISTRIBUTIONS: Sequence[Column[Modifiers]] = [
    Column(
        "percentiles",
        AggregateFunction("quantiles(0.5, 0.75, 0.9, 0.95, 0.99)",
                          [Float(64)]),
    ),
Ejemplo n.º 29
0
        ("platform", LowCardinality(String())),
        ("environment", LowCardinality(Nullable(String()))),
        ("release", LowCardinality(Nullable(String()))),
        ("dist", LowCardinality(Nullable(String()))),
        ("ip_address_v4", Nullable(IPv4())),
        ("ip_address_v6", Nullable(IPv6())),
        ("user", WithDefault(String(), "''",)),
        ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
        ("user_id", Nullable(String())),
        ("user_name", Nullable(String())),
        ("user_email", Nullable(String())),
        ("sdk_name", WithDefault(LowCardinality(String()), "''")),
        ("sdk_version", WithDefault(LowCardinality(String()), "''")),
        ("http_method", LowCardinality(Nullable(String()))),
        ("http_referer", Nullable(String())),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_flattened", String()),
        ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("_contexts_flattened", String()),
        (
            "measurements",
            Nested([("key", LowCardinality(String())), ("value", Float(64))]),
        ),
        ("partition", UInt(16)),
        ("offset", UInt(64)),
        ("message_timestamp", DateTime()),
        ("retention_days", UInt(16)),
        ("deleted", UInt(8)),
    ]
)
Ejemplo n.º 30
0
class Migration(migration.ClickhouseNodeMigration):
    blocking = False
    local_table_name = "generic_metric_sets_raw_local"
    dist_table_name = "generic_metric_sets_raw_dist"
    columns: Sequence[Column[Modifiers]] = [
        Column("use_case_id", String(Modifiers(low_cardinality=True))),
        Column("org_id", UInt(64)),
        Column("project_id", UInt(64)),
        Column("metric_id", UInt(64)),
        Column("timestamp", DateTime()),
        Column("retention_days", UInt(16)),
        Column(
            "tags",
            Nested([
                ("key", UInt(64)),
                ("indexed_value", UInt(64)),
                ("raw_value", String()),
            ]),
        ),
        Column("set_values", Array(UInt(64))),
        Column("count_value", Float(64)),
        Column("distribution_values", Array(Float(64))),
        Column("metric_type", String(Modifiers(low_cardinality=True))),
        Column("materialization_version", UInt(8)),
        Column("timeseries_id", UInt(32)),
        Column("partition", UInt(16)),
        Column("offset", UInt(64)),
    ]

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
                engine=table_engines.MergeTree(
                    storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                    order_by=
                    "(use_case_id, org_id, project_id, metric_id, timestamp)",
                    partition_by=
                    "(toStartOfInterval(timestamp, toIntervalDay(3)))",
                    ttl="timestamp + toIntervalDay(7)",
                ),
                columns=self.columns,
            )
        ]

    def backwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.local_table_name,
            )
        ]

    def forwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.dist_table_name,
                engine=table_engines.Distributed(
                    local_table_name=self.local_table_name,
                    sharding_key="cityHash64(timeseries_id)",
                ),
                columns=self.columns,
            )
        ]

    def backwards_dist(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.DropTable(
                storage_set=StorageSetKey.GENERIC_METRICS_SETS,
                table_name=self.dist_table_name,
            )
        ]