예제 #1
0
def test_modifiers() -> None:
    cols = ColumnSet(
        [
            ("col1", WithDefault(String(), "")),
            ("col2", Nullable(Array(String()))),
            ("col3", WithCodecs(Materialized(String(), "something"), ["c"]),),
            (
                "col4",
                WithCodecs(Nullable(Materialized(String(), "something")), ["c"],),
            ),
        ]
    )

    assert [WithDefault] == cols["col1"].type.get_all_modifiers()
    assert [Nullable] == cols["col2"].type.get_all_modifiers()
    assert [Materialized, WithCodecs] == cols["col3"].type.get_all_modifiers()
    assert [Materialized, Nullable, WithCodecs] == cols["col4"].type.get_all_modifiers()
예제 #2
0
def _get_column(column_type: str, default_type: str, default_expr: str,
                codec_expr: str) -> ColumnType:
    column: ColumnType = Visitor().visit(grammar.parse(column_type))

    if default_type == "MATERIALIZED":
        column = Materialized(column, _strip_cast(default_expr))
    elif default_type == "DEFAULT":
        column = WithDefault(column, _strip_cast(default_expr))

    if codec_expr:
        column = WithCodecs(column, codec_expr.split(", "))

    return column
예제 #3
0
        )

    if "http_referer" not in current_schema:
        ret.append(
            f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method"
        )

    return ret


all_columns = ColumnSet(
    [
        ("org_id", UInt(64)),
        ("project_id", UInt(64)),
        ("timestamp", DateTime()),
        ("event_id", WithCodecs(UUID(), ["NONE"])),
        (
            "event_hash",
            WithCodecs(
                Materialized(UInt(64), "cityHash64(toString(event_id))",), ["NONE"],
            ),
        ),
        ("platform", LowCardinality(String())),
        ("environment", LowCardinality(Nullable(String()))),
        ("release", LowCardinality(Nullable(String()))),
        ("dist", LowCardinality(Nullable(String()))),
        ("ip_address_v4", Nullable(IPv4())),
        ("ip_address_v6", Nullable(IPv6())),
        ("user", WithDefault(String(), "''")),
        ("user_hash", Materialized(UInt(64), "cityHash64(user)"),),
        ("user_id", Nullable(String())),
예제 #4
0
    (("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))),
    # Nullable
    (("Nullable(String)", "", "", ""), Nullable(String())),
    (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))),
    (("Nullable(Date)", "", "", ""), Nullable(Date())),
    # Low cardinality
    (("LowCardinality(String)", "", "", ""), LowCardinality(String())),
    (("LowCardinality(Nullable(String))", "", "", ""),
     LowCardinality(Nullable(String()))),
    # Materialized
    (("Date", "MATERIALIZED", "toDate(col1)", ""),
     Materialized(Date(), "toDate(col1)")),
    (("UInt64", "MATERIALIZED", "CAST(cityHash64(col1), 'UInt64')", ""),
     Materialized(UInt(64), "cityHash64(col1)")),
    # Default value
    (("LowCardinality(String)", "DEFAULT", "a", ""),
     WithDefault(LowCardinality(String()), "a")),
    (("UInt8", "DEFAULT", "2", ""), WithDefault(UInt(8), "2")),
    # With codecs
    (("UUID", "", "", "NONE"), WithCodecs(UUID(), ["NONE"])),
    (("DateTime", "", "", "DoubleDelta, LZ4"),
     WithCodecs(DateTime(), ["DoubleDelta", "LZ4"])),
]


@pytest.mark.parametrize("input, expected_output", test_data)
def test_parse_column(input, expected_output):
    (input_name, input_type, default_expr, codec_expr) = input
    assert _get_column(input_name, input_type, default_expr,
                       codec_expr) == expected_output
예제 #5
0
파일: errors.py 프로젝트: jiankunking/snuba
    def __init__(self) -> None:
        all_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("timestamp", DateTime()),
            ("event_id", WithCodecs(UUID(), ["NONE"])),
            (
                "event_hash",
                WithCodecs(
                    Materialized(
                        UInt(64),
                        "cityHash64(toString(event_id))",
                    ),
                    ["NONE"],
                ),
            ),
            ("platform", LowCardinality(String())),
            ("environment", LowCardinality(Nullable(String()))),
            ("release", LowCardinality(Nullable(String()))),
            ("dist", LowCardinality(Nullable(String()))),
            ("ip_address_v4", Nullable(IPv4())),
            ("ip_address_v6", Nullable(IPv6())),
            ("user", WithDefault(String(), "''")),
            (
                "user_hash",
                Materialized(UInt(64), "cityHash64(user)"),
            ),
            ("user_id", Nullable(String())),
            ("user_name", Nullable(String())),
            ("user_email", Nullable(String())),
            ("sdk_name", LowCardinality(Nullable(String()))),
            ("sdk_version", LowCardinality(Nullable(String()))),
            ("tags", Nested([("key", String()), ("value", String())])),
            ("_tags_flattened", String()),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("_contexts_flattened", String()),
            ("transaction_name", WithDefault(LowCardinality(String()), "''")),
            (
                "transaction_hash",
                Materialized(UInt(64), "cityHash64(transaction_name)"),
            ),
            ("span_id", Nullable(UInt(64))),
            ("trace_id", Nullable(UUID())),
            ("partition", UInt(16)),
            ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
            ("retention_days", UInt(16)),
            ("deleted", UInt(8)),
            ("group_id", UInt(64)),
            ("primary_hash", FixedString(32)),
            ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")),
            ("event_string", WithCodecs(String(), ["NONE"])),
            ("received", DateTime()),
            ("message", String()),
            ("title", String()),
            ("culprit", String()),
            ("level", LowCardinality(String())),
            ("location", Nullable(String())),
            ("version", LowCardinality(Nullable(String()))),
            ("type", LowCardinality(String())),
            (
                "exception_stacks",
                Nested([
                    ("type", Nullable(String())),
                    ("value", Nullable(String())),
                    ("mechanism_type", Nullable(String())),
                    ("mechanism_handled", Nullable(UInt(8))),
                ]),
            ),
            (
                "exception_frames",
                Nested([
                    ("abs_path", Nullable(String())),
                    ("colno", Nullable(UInt(32))),
                    ("filename", Nullable(String())),
                    ("function", Nullable(String())),
                    ("lineno", Nullable(UInt(32))),
                    ("in_app", Nullable(UInt(8))),
                    ("package", Nullable(String())),
                    ("module", Nullable(String())),
                    ("stack_level", Nullable(UInt(16))),
                ]),
            ),
            ("sdk_integrations", Array(String())),
            ("modules", Nested([("name", String()), ("version", String())])),
        ])

        self.__promoted_tag_columns = {
            "environment": "environment",
            "sentry:release": "release",
            "sentry:dist": "dist",
            "sentry:user": "******",
            "transaction": "transaction_name",
            "level": "level",
        }

        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="errors_local",
            dist_table_name="errors_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by=
            "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)",
            partition_by=
            "(toMonday(timestamp), if(retention_days = 30, 30, 90))",
            version_column="deleted",
            sample_expr="event_hash",
            ttl_expr="timestamp + toIntervalDay(retention_days)",
            settings={"index_granularity": "8192"},
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=ErrorsProcessor(self.__promoted_tag_columns),
                default_topic="events",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )