Esempio n. 1
0
def _get_column(column_type: str, default_type: str, default_expr: str,
                codec_expr: str) -> ColumnType:
    column: ColumnType = Visitor().visit(grammar.parse(column_type))

    if default_type == "MATERIALIZED":
        column = Materialized(column, _strip_cast(default_expr))
    elif default_type == "DEFAULT":
        column = WithDefault(column, _strip_cast(default_expr))

    if codec_expr:
        column = WithCodecs(column, codec_expr.split(", "))

    return column
Esempio n. 2
0
def test_modifiers() -> None:
    cols = ColumnSet(
        [
            ("col1", WithDefault(String(), "")),
            ("col2", Nullable(Array(String()))),
            ("col3", WithCodecs(Materialized(String(), "something"), ["c"]),),
            (
                "col4",
                WithCodecs(Nullable(Materialized(String(), "something")), ["c"],),
            ),
        ]
    )

    assert [WithDefault] == cols["col1"].type.get_all_modifiers()
    assert [Nullable] == cols["col2"].type.get_all_modifiers()
    assert [Materialized, WithCodecs] == cols["col3"].type.get_all_modifiers()
    assert [Materialized, Nullable, WithCodecs] == cols["col4"].type.get_all_modifiers()
Esempio n. 3
0
            f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method"
        )

    return ret


columns = ColumnSet(
    [
        ("project_id", UInt(64)),
        ("event_id", UUID()),
        ("trace_id", UUID()),
        ("span_id", UInt(64)),
        ("transaction_name", LowCardinality(String())),
        ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",),),
        ("transaction_op", LowCardinality(String())),
        ("transaction_status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS))),
        ("start_ts", DateTime()),
        ("start_ms", UInt(16)),
        ("finish_ts", DateTime()),
        ("finish_ms", UInt(16)),
        ("duration", UInt(32)),
        ("platform", LowCardinality(String())),
        ("environment", LowCardinality(Nullable(String()))),
        ("release", LowCardinality(Nullable(String()))),
        ("dist", LowCardinality(Nullable(String()))),
        ("ip_address_v4", Nullable(IPv4())),
        ("ip_address_v6", Nullable(IPv6())),
        ("user", WithDefault(String(), "''",)),
        ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
        ("user_id", Nullable(String())),
        ("user_name", Nullable(String())),
Esempio n. 4
0
 ("project_id", UInt(64)),
 ("timestamp", DateTime()),
 ("event_id", WithCodecs(UUID(), ["NONE"])),
 (
     "event_hash",
     WithCodecs(
         Materialized(UInt(64), "cityHash64(toString(event_id))",), ["NONE"],
     ),
 ),
 ("platform", LowCardinality(String())),
 ("environment", LowCardinality(Nullable(String()))),
 ("release", LowCardinality(Nullable(String()))),
 ("dist", LowCardinality(Nullable(String()))),
 ("ip_address_v4", Nullable(IPv4())),
 ("ip_address_v6", Nullable(IPv6())),
 ("user", WithDefault(String(), "''")),
 ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
 ("user_id", Nullable(String())),
 ("user_name", Nullable(String())),
 ("user_email", Nullable(String())),
 ("sdk_name", LowCardinality(Nullable(String()))),
 ("sdk_version", LowCardinality(Nullable(String()))),
 ("http_method", LowCardinality(Nullable(String()))),
 ("http_referer", Nullable(String())),
 ("tags", Nested([("key", String()), ("value", String())])),
 ("_tags_flattened", String()),
 ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
 ("contexts", Nested([("key", String()), ("value", String())])),
 ("_contexts_flattened", String()),
 ("transaction_name", WithDefault(LowCardinality(String()), "''")),
 ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"),),
Esempio n. 5
0
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.web.split import TimeSplitQueryStrategy

columns = ColumnSet([
    ("project_id", UInt(64)),
    ("transaction_id", UUID()),
    ("trace_id", UUID()),
    ("transaction_span_id", UInt(64)),
    ("span_id", UInt(64)),
    ("parent_span_id", Nullable(UInt(64))),
    ("transaction_name", LowCardinality(String())),
    ("description", String()),  # description in span
    ("op", LowCardinality(String())),
    (
        "status",
        WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),
    ),
    ("start_ts", DateTime()),
    ("start_ns", UInt(32)),
    ("finish_ts", DateTime()),
    ("finish_ns", UInt(32)),
    ("duration_ms", UInt(32)),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
    ("retention_days", UInt(16)),
    ("deleted", UInt(8)),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="spans_experimental_local",
Esempio n. 6
0
    def __init__(self) -> None:
        columns = ColumnSet(
            [
                ("project_id", UInt(64)),
                ("event_id", UUID()),
                ("trace_id", UUID()),
                ("span_id", UInt(64)),
                ("transaction_name", LowCardinality(String())),
                (
                    "transaction_hash",
                    Materialized(UInt(64), "cityHash64(transaction_name)",),
                ),
                ("transaction_op", LowCardinality(String())),
                ("transaction_status", WithDefault(UInt(8), UNKNOWN_SPAN_STATUS)),
                ("start_ts", DateTime()),
                ("start_ms", UInt(16)),
                ("_start_date", Materialized(Date(), "toDate(start_ts)"),),
                ("finish_ts", DateTime()),
                ("finish_ms", UInt(16)),
                ("_finish_date", Materialized(Date(), "toDate(finish_ts)"),),
                ("duration", UInt(32)),
                ("platform", LowCardinality(String())),
                ("environment", LowCardinality(Nullable(String()))),
                ("release", LowCardinality(Nullable(String()))),
                ("dist", LowCardinality(Nullable(String()))),
                ("ip_address_v4", Nullable(IPv4())),
                ("ip_address_v6", Nullable(IPv6())),
                ("user", WithDefault(String(), "''",)),
                ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
                ("user_id", Nullable(String())),
                ("user_name", Nullable(String())),
                ("user_email", Nullable(String())),
                ("sdk_name", WithDefault(LowCardinality(String()), "''")),
                ("sdk_version", WithDefault(LowCardinality(String()), "''")),
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                ("contexts", Nested([("key", String()), ("value", String())])),
                ("_contexts_flattened", String()),
                ("partition", UInt(16)),
                ("offset", UInt(64)),
                ("retention_days", UInt(16)),
                ("deleted", UInt(8)),
            ]
        )

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name="transactions_local",
            dist_table_name="transactions_dist",
            mandatory_conditions=[],
            prewhere_candidates=["event_id", "project_id"],
            order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))",
            partition_by="(retention_days, toMonday(_finish_date))",
            version_column="deleted",
            sample_expr=None,
            migration_function=transactions_migrations,
        )

        dataset_schemas = DatasetSchemas(read_schema=schema, write_schema=schema,)

        self.__tags_processor = TagColumnProcessor(
            columns=columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=TransactionsTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=TransactionsMessageProcessor(), default_topic="events",
                ),
            ),
            time_group_columns={
                "bucketed_start": "start_ts",
                "bucketed_end": "finish_ts",
            },
            time_parse_columns=("start_ts", "finish_ts"),
        )
Esempio n. 7
0
    def __init__(self):
        columns = ColumnSet([
            ('project_id', UInt(64)),
            ('event_id', UUID()),
            ('trace_id', UUID()),
            ('span_id', UInt(64)),
            ('transaction_name', String()),
            ('transaction_hash',
             Materialized(
                 UInt(64),
                 'cityHash64(transaction_name)',
             )),
            ('transaction_op', LowCardinality(String())),
            ('start_ts', DateTime()),
            ('start_ms', UInt(16)),
            ('finish_ts', DateTime()),
            ('finish_ms', UInt(16)),
            ('duration',
             Materialized(
                 UInt(32),
                 '((finish_ts - start_ts) * 1000) + (finish_ms - start_ms)',
             )),
            ('platform', LowCardinality(String())),
            ('environment', Nullable(String())),
            ('release', Nullable(String())),
            ('dist', Nullable(String())),
            ('ip_address_v4', Nullable(IPv4())),
            ('ip_address_v6', Nullable(IPv6())),
            ('user', WithDefault(
                String(),
                "''",
            )),
            ('user_id', Nullable(String())),
            ('user_name', Nullable(String())),
            ('user_email', Nullable(String())),
            ('tags', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('contexts', Nested([
                ('key', String()),
                ('value', String()),
            ])),
            ('partition', UInt(16)),
            ('offset', UInt(64)),
            ('retention_days', UInt(16)),
            ('deleted', UInt(8)),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='transactions_local',
            dist_table_name='transactions_dist',
            order_by=
            '(project_id, toStartOfDay(start_ts), transaction_hash, start_ts, start_ms, trace_id, span_id)',
            partition_by='(retention_days, toMonday(start_ts))',
            version_column='deleted',
            sample_expr=None,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            processor=TransactionsMessageProcessor(),
            default_topic="events",
            time_group_columns={
                'bucketed_start': 'start_ts',
                'bucketed_end': 'finish_ts',
            },
        )
Esempio n. 8
0
    (("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))),
    # Nullable
    (("Nullable(String)", "", "", ""), Nullable(String())),
    (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))),
    (("Nullable(Date)", "", "", ""), Nullable(Date())),
    # Low cardinality
    (("LowCardinality(String)", "", "", ""), LowCardinality(String())),
    (("LowCardinality(Nullable(String))", "", "", ""),
     LowCardinality(Nullable(String()))),
    # Materialized
    (("Date", "MATERIALIZED", "toDate(col1)", ""),
     Materialized(Date(), "toDate(col1)")),
    (("UInt64", "MATERIALIZED", "CAST(cityHash64(col1), 'UInt64')", ""),
     Materialized(UInt(64), "cityHash64(col1)")),
    # Default value
    (("LowCardinality(String)", "DEFAULT", "a", ""),
     WithDefault(LowCardinality(String()), "a")),
    (("UInt8", "DEFAULT", "2", ""), WithDefault(UInt(8), "2")),
    # With codecs
    (("UUID", "", "", "NONE"), WithCodecs(UUID(), ["NONE"])),
    (("DateTime", "", "", "DoubleDelta, LZ4"),
     WithCodecs(DateTime(), ["DoubleDelta", "LZ4"])),
]


@pytest.mark.parametrize("input, expected_output", test_data)
def test_parse_column(input, expected_output):
    (input_name, input_type, default_expr, codec_expr) = input
    assert _get_column(input_name, input_type, default_expr,
                       codec_expr) == expected_output
Esempio n. 9
0
from snuba.clickhouse.columns import Column, DateTime, Enum, String, UInt, WithDefault
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations
from snuba.migrations.context import Context
from snuba.migrations.status import Status
from snuba.migrations.table_engines import Distributed, ReplacingMergeTree

columns = [
    Column("group", String()),
    Column("migration_id", String()),
    Column("timestamp", DateTime()),
    Column(
        "status",
        Enum([("completed", 0), ("in_progress", 1), ("not_started", 2)]),
    ),
    Column("version", WithDefault(UInt(64), "1")),
]


class Migration(migration.Migration):
    """
    This migration extends Migration instead of MultiStepMigration since it is
    responsible for bootstrapping the migration system itself. It skips setting
    the in progress status in the forwards method and the not started status in
    the backwards method. Since the migration table doesn't exist yet, we can't
    write any statuses until this migration is completed.
    """

    blocking = False

    def __forwards_local(self) -> Sequence[operations.Operation]:
Esempio n. 10
0
 def __forward_migrations(
         self, table_name: str) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.all_columns",
                 WithDefault(
                     Array(Array(LowCardinality(String()))),
                     "arrayResize([['']], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.consistent",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.or_conditions",
                 WithDefault(
                     Array(UInt(8)),
                     "arrayResize([0], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.all_columns",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.where_columns",
                 WithDefault(
                     Array(Array(LowCardinality(String()))),
                     "arrayResize([['']], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.or_conditions",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.where_mapping_columns",
                 WithDefault(
                     Array(Array(LowCardinality(String()))),
                     "arrayResize([['']], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.where_columns",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.groupby_columns",
                 WithDefault(
                     Array(Array(LowCardinality(String()))),
                     "arrayResize([['']], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.where_mapping_columns",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.QUERYLOG,
             table_name=table_name,
             column=Column(
                 "clickhouse_queries.array_join_columns",
                 WithDefault(
                     Array(Array(LowCardinality(String()))),
                     "arrayResize([['']], length(clickhouse_queries.sql))",
                 ),
             ),
             after="clickhouse_queries.groupby_columns",
         ),
     ]
Esempio n. 11
0
 ("clickhouse_queries.duration_ms", Array(UInt(32))),
 ("clickhouse_queries.stats", Array(String())),
 ("clickhouse_queries.final", Array(UInt(8))),
 ("clickhouse_queries.cache_hit", Array(UInt(8))),
 ("clickhouse_queries.sample", Array(Float(32))),
 ("clickhouse_queries.max_threads", Array(UInt(8))),
 ("clickhouse_queries.num_days", Array(UInt(32))),
 ("clickhouse_queries.clickhouse_table", Array(LowCardinality(String()))),
 ("clickhouse_queries.query_id", Array(String())),
 # XXX: ``is_duplicate`` is currently not set when using the
 # ``Cache.get_readthrough`` query execution path. See GH-902.
 ("clickhouse_queries.is_duplicate", Array(UInt(8))),
 ("clickhouse_queries.consistent", Array(UInt(8))),
 (
     "clickhouse_queries.all_columns",
     WithDefault(Array(Array(LowCardinality(String()))),
                 NESTED_ARRAY_DEFAULT),
 ),
 (
     "clickhouse_queries.or_conditions",
     WithDefault(
         Array(UInt(8)),
         "arrayResize([0], length(clickhouse_queries.sql))",
     ),
 ),
 (
     "clickhouse_queries.where_columns",
     WithDefault(Array(Array(LowCardinality(String()))),
                 NESTED_ARRAY_DEFAULT),
 ),
 (
     "clickhouse_queries.where_mapping_columns",
Esempio n. 12
0
UNKNOWN_SPAN_STATUS = SPAN_STATUS_NAME_TO_CODE["unknown"]

tags_col = Column("tags", Nested([("key", String()), ("value", String())]))

columns = [
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("trace_id", UUID()),
    Column("transaction_span_id", UInt(64)),
    Column("span_id", UInt(64)),
    Column("parent_span_id", Nullable(UInt(64))),
    Column("transaction_name", LowCardinality(String())),
    Column("description", String()),  # description in span
    Column("op", LowCardinality(String())),
    Column("status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),),
    Column("start_ts", DateTime()),
    Column("start_ns", UInt(32)),
    Column("finish_ts", DateTime()),
    Column("finish_ns", UInt(32)),
    Column("duration_ms", UInt(32)),
    tags_col,
    Column("retention_days", UInt(16)),
    Column("deleted", UInt(8)),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
Esempio n. 13
0
    def __init__(self) -> None:
        all_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("timestamp", DateTime()),
            ("event_id", WithCodecs(UUID(), ["NONE"])),
            (
                "event_hash",
                WithCodecs(
                    Materialized(
                        UInt(64),
                        "cityHash64(toString(event_id))",
                    ),
                    ["NONE"],
                ),
            ),
            ("platform", LowCardinality(String())),
            ("environment", LowCardinality(Nullable(String()))),
            ("release", LowCardinality(Nullable(String()))),
            ("dist", LowCardinality(Nullable(String()))),
            ("ip_address_v4", Nullable(IPv4())),
            ("ip_address_v6", Nullable(IPv6())),
            ("user", WithDefault(String(), "''")),
            (
                "user_hash",
                Materialized(UInt(64), "cityHash64(user)"),
            ),
            ("user_id", Nullable(String())),
            ("user_name", Nullable(String())),
            ("user_email", Nullable(String())),
            ("sdk_name", LowCardinality(Nullable(String()))),
            ("sdk_version", LowCardinality(Nullable(String()))),
            ("tags", Nested([("key", String()), ("value", String())])),
            ("_tags_flattened", String()),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("_contexts_flattened", String()),
            ("transaction_name", WithDefault(LowCardinality(String()), "''")),
            (
                "transaction_hash",
                Materialized(UInt(64), "cityHash64(transaction_name)"),
            ),
            ("span_id", Nullable(UInt(64))),
            ("trace_id", Nullable(UUID())),
            ("partition", UInt(16)),
            ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
            ("retention_days", UInt(16)),
            ("deleted", UInt(8)),
            ("group_id", UInt(64)),
            ("primary_hash", FixedString(32)),
            ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")),
            ("event_string", WithCodecs(String(), ["NONE"])),
            ("received", DateTime()),
            ("message", String()),
            ("title", String()),
            ("culprit", String()),
            ("level", LowCardinality(String())),
            ("location", Nullable(String())),
            ("version", LowCardinality(Nullable(String()))),
            ("type", LowCardinality(String())),
            (
                "exception_stacks",
                Nested([
                    ("type", Nullable(String())),
                    ("value", Nullable(String())),
                    ("mechanism_type", Nullable(String())),
                    ("mechanism_handled", Nullable(UInt(8))),
                ]),
            ),
            (
                "exception_frames",
                Nested([
                    ("abs_path", Nullable(String())),
                    ("colno", Nullable(UInt(32))),
                    ("filename", Nullable(String())),
                    ("function", Nullable(String())),
                    ("lineno", Nullable(UInt(32))),
                    ("in_app", Nullable(UInt(8))),
                    ("package", Nullable(String())),
                    ("module", Nullable(String())),
                    ("stack_level", Nullable(UInt(16))),
                ]),
            ),
            ("sdk_integrations", Array(String())),
            ("modules", Nested([("name", String()), ("version", String())])),
        ])

        self.__promoted_tag_columns = {
            "environment": "environment",
            "sentry:release": "release",
            "sentry:dist": "dist",
            "sentry:user": "******",
            "transaction": "transaction_name",
            "level": "level",
        }

        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="errors_local",
            dist_table_name="errors_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by=
            "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)",
            partition_by=
            "(toMonday(timestamp), if(retention_days = 30, 30, 90))",
            version_column="deleted",
            sample_expr="event_hash",
            ttl_expr="timestamp + toIntervalDay(retention_days)",
            settings={"index_granularity": "8192"},
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=ErrorsProcessor(self.__promoted_tag_columns),
                default_topic="events",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )