Beispiel #1
0
    def __init__(self, storage: WritableTableStorage,
                 metrics: MetricsBackend) -> None:
        self.clickhouse = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.REPLACE)

        self.metrics = metrics
        processor = storage.get_table_writer().get_replacer_processor()
        assert (
            processor
        ), f"This storage writer does not support replacements {storage.get_storage_key().value}"
        self.__replacer_processor = processor
        self.__table_name = (
            storage.get_table_writer().get_schema().get_local_table_name())
Beispiel #2
0
    def __init__(self, storage: WritableTableStorage,
                 metrics: MetricsBackend) -> None:
        self.__storage = storage

        self.metrics = metrics
        processor = storage.get_table_writer().get_replacer_processor()
        assert (
            processor
        ), f"This storage writer does not support replacements {storage.get_storage_key().value}"
        self.__replacer_processor = processor
        self.__database_name = storage.get_cluster().get_database()

        self.__sharded_pool = RoundRobinConnectionPool(
            self.__storage.get_cluster())
Beispiel #3
0
def get_active_partitions(clickhouse: ClickhousePool,
                          storage: WritableTableStorage, database: str,
                          table: str) -> Sequence[util.Part]:

    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {
            "database": database,
            "table": table
        },
    )

    schema = storage.get_schema()
    assert isinstance(schema, TableSchema)
    part_format = schema.get_part_format()
    assert part_format is not None
    return [
        util.decode_part_str(part, part_format) for part, in response.results
    ]
Beispiel #4
0
    def __build_batch_writer(
            self,
            storage: WritableTableStorage) -> ProcessedMessageBatchWriter:
        replacement_batch_writer: Optional[ReplacementBatchWriter]
        stream_loader = storage.get_table_writer().get_stream_loader()
        replacement_topic_spec = stream_loader.get_replacement_topic_spec()
        default_topic_spec = stream_loader.get_default_topic_spec()
        if replacement_topic_spec is not None:
            # XXX: The producer is flushed when closed on strategy teardown
            # after an assignment is revoked, but never explicitly closed.
            # XXX: This assumes that the Kafka cluster used for the input topic
            # to the storage is the same as the replacement topic.
            replacement_batch_writer = ReplacementBatchWriter(
                ConfluentKafkaProducer(
                    build_kafka_producer_configuration(
                        default_topic_spec.topic,
                        override_params={
                            "partitioner": "consistent",
                            "message.max.bytes":
                            50000000,  # 50MB, default is 1MB
                        },
                    )),
                Topic(replacement_topic_spec.topic_name),
            )
        else:
            replacement_batch_writer = None

        return ProcessedMessageBatchWriter(
            InsertBatchWriter(
                storage.get_table_writer().get_batch_writer(
                    self.__metrics,
                    {
                        "load_balancing": "in_order",
                        "insert_distributed_sync": 1
                    },
                ),
                MetricsWrapper(
                    self.__metrics,
                    "insertions",
                    {"storage": storage.get_storage_key().value},
                ),
            ),
            replacement_batch_writer,
        )
Beispiel #5
0
 def __init__(self, clickhouse: ClickhousePool,
              storage: WritableTableStorage,
              metrics: MetricsBackend) -> None:
     self.clickhouse = clickhouse
     self.metrics = metrics
     processor = storage.get_table_writer().get_replacer_processor()
     assert (
         processor
     ), f"This storage writer does not support replacements {type(storage)}"
     self.__replacer_processor = processor
Beispiel #6
0
def run_cleanup(
    clickhouse: ClickhousePool,
    storage: WritableTableStorage,
    database: str,
    dry_run: bool = True,
) -> int:

    table = storage.get_table_writer().get_schema().get_local_table_name()

    active_parts = get_active_partitions(clickhouse, storage, database, table)
    stale_parts = filter_stale_partitions(active_parts)
    drop_partitions(clickhouse, database, table, stale_parts, dry_run=dry_run)
    return len(stale_parts)
Beispiel #7
0
    def __init__(
        self,
        storage: WritableTableStorage,
        consumer_group: str,
        metrics: MetricsBackend,
    ) -> None:
        self.__storage = storage

        self.metrics = metrics
        processor = storage.get_table_writer().get_replacer_processor()
        assert (
            processor
        ), f"This storage writer does not support replacements {storage.get_storage_key().value}"
        self.__replacer_processor = processor
        self.__database_name = storage.get_cluster().get_database()

        self.__sharded_pool = RoundRobinConnectionPool(
            self.__storage.get_cluster())
        self.__rate_limiter = RateLimiter("replacements")

        self.__last_offset_processed_per_partition: MutableMapping[
            str, int] = dict()
        self.__consumer_group = consumer_group
Beispiel #8
0
    def __init__(
        self,
        storage: WritableTableStorage,
        metrics: MetricsBackend,
        producer: Optional[ConfluentKafkaProducer] = None,
        replacements_topic: Optional[Topic] = None,
    ) -> None:
        self.__storage = storage
        self.producer = producer
        self.replacements_topic = replacements_topic
        self.metrics = metrics
        table_writer = storage.get_table_writer()
        self.__writer = BatchWriterEncoderWrapper(
            table_writer.get_batch_writer(metrics, {
                "load_balancing": "in_order",
                "insert_distributed_sync": 1
            }),
            JSONRowEncoder(),
        )

        self.__processor: MessageProcessor
        self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()
Beispiel #9
0
    def __init__(
        self,
        storage: WritableTableStorage,
        metrics: MetricsBackend,
        producer: Optional[ConfluentKafkaProducer] = None,
        replacements_topic: Optional[Topic] = None,
        rapidjson_deserialize: bool = False,
        rapidjson_serialize: bool = False,
    ) -> None:
        self.__storage = storage
        self.producer = producer
        self.replacements_topic = replacements_topic
        self.metrics = metrics
        table_writer = storage.get_table_writer()
        self.__writer = table_writer.get_writer(
            {
                "load_balancing": "in_order",
                "insert_distributed_sync": 1
            },
            rapidjson_serialize=rapidjson_serialize,
        )

        self.__rapidjson_deserialize = rapidjson_deserialize
        self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()
Beispiel #10
0
    dist_table_name="sentry_dist",
    storage_set_key=StorageSetKey.EVENTS,
    mandatory_conditions=mandatory_conditions,
    part_format=[util.PartSegment.DATE, util.PartSegment.RETENTION_DAYS],
)


storage = WritableTableStorage(
    storage_key=StorageKey.EVENTS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=query_processors,
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=EventsProcessor(promoted_tag_columns),
        default_topic=Topic.EVENTS,
        replacement_topic=Topic.EVENT_REPLACEMENTS_LEGACY,
        commit_log_topic=Topic.COMMIT_LOG,
        subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_EVENTS,
    ),
    query_splitters=query_splitters,
    mandatory_condition_checkers=[ProjectIdEnforcer()],
    replacer_processor=ErrorsReplacer(
        schema=schema,
        required_columns=[col.escaped for col in required_columns],
        tag_column_map=get_tag_column_map(),
        promoted_tags=get_promoted_tags(),
        state_name=ReplacerState.EVENTS,
        use_promoted_prewhere=True,
    ),
)
Beispiel #11
0
schema = WritableTableSchema(
    columns=all_columns,
    local_table_name="sentry_local",
    dist_table_name="sentry_dist",
    storage_set_key=StorageSetKey.EVENTS,
    mandatory_conditions=mandatory_conditions,
    prewhere_candidates=prewhere_candidates,
)

storage = WritableTableStorage(
    storage_key=StorageKey.EVENTS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=query_processors,
    stream_loader=KafkaStreamLoader(
        processor=EventsProcessor(promoted_tag_columns),
        default_topic="events",
        replacement_topic="event-replacements",
        commit_log_topic="snuba-commit-log",
    ),
    query_splitters=query_splitters,
    replacer_processor=ErrorsReplacer(
        schema=schema,
        required_columns=[col.escaped for col in required_columns],
        tag_column_map=get_tag_column_map(),
        promoted_tags=get_promoted_tags(),
        state_name=ReplacerState.EVENTS,
    ),
)
Beispiel #12
0
    prewhere_candidates=["event_id", "transaction_name",  "project_id"],
    order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))",
    partition_by="(retention_days, toMonday(_finish_date))",
    version_column="deleted",
    sample_expr=None,
    migration_function=transactions_migrations,
)

storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TransactionsTableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=TransactionsMessageProcessor(), default_topic="events",
        ),
    ),
    query_processors=[
        NestedFieldConditionOptimizer(
            "tags", "_tags_flattened", {"start_ts", "finish_ts"}, BEGINNING_OF_TIME,
        ),
        NestedFieldConditionOptimizer(
            "contexts",
            "_contexts_flattened",
            {"start_ts", "finish_ts"},
            BEGINNING_OF_TIME,
        ),
        TransactionColumnProcessor(),
        PrewhereProcessor(),
    ],
)
Beispiel #13
0
    dist_table_name="transactions_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
    mandatory_conditions=[],
    prewhere_candidates=[
        "event_id", "transaction_name", "transaction", "title"
    ],
    part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE],
)

storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        EventIdColumnProcessor(),
        ArrayJoinKeyValueOptimizer("tags"),
        ArrayJoinKeyValueOptimizer("measurements"),
        UUIDColumnProcessor(set(["event_id", "trace_id"])),
        PrewhereProcessor(),
    ],
    stream_loader=build_kafka_stream_loader_from_settings(
        StorageKey.TRANSACTIONS,
        processor=TransactionsMessageProcessor(),
        default_topic_name="events",
        commit_log_topic_name="snuba-commit-log",
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
    writer_options={"insert_allow_materialized_columns": 1},
)
Beispiel #14
0
    columns=columns,
    local_table_name="transactions_local",
    dist_table_name="transactions_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
    mandatory_conditions=[],
    part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE],
)

storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=query_processors,
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=TransactionsMessageProcessor(),
        pre_filter=KafkaHeaderFilterWithBypass("transaction_forwarder", "0",
                                               100),
        default_topic=Topic.TRANSACTIONS,
        commit_log_topic=Topic.TRANSACTIONS_COMMIT_LOG,
        subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL,
        subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_TRANSACTIONS,
        subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS,
    ),
    query_splitters=query_splitters,
    mandatory_condition_checkers=mandatory_condition_checkers,
    writer_options={
        "insert_allow_materialized_columns": 1,
        "input_format_skip_unknown_fields": 1,
    },
)
Beispiel #15
0
materialized_view_schema = MaterializedViewSchema(
    local_materialized_view_name="outcomes_mv_hourly_local",
    dist_materialized_view_name="outcomes_mv_hourly_dist",
    storage_set_key=StorageSetKey.OUTCOMES,
    prewhere_candidates=["project_id", "org_id"],
    columns=materialized_view_columns,
    query=query,
    local_source_table_name=WRITE_LOCAL_TABLE_NAME,
    local_destination_table_name=READ_LOCAL_TABLE_NAME,
    dist_source_table_name=WRITE_DIST_TABLE_NAME,
    dist_destination_table_name=READ_DIST_TABLE_NAME,
)

raw_storage = WritableTableStorage(
    storage_key=StorageKey.OUTCOMES_RAW,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=raw_schema,
    query_processors=[],
    stream_loader=KafkaStreamLoader(
        processor=OutcomesProcessor(),
        default_topic="outcomes",
    ),
)

materialized_storage = ReadableTableStorage(
    storage_key=StorageKey.OUTCOMES_HOURLY,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=read_schema,
    query_processors=[PrewhereProcessor()],
)
Beispiel #16
0
)

storage = WritableTableStorage(
    storage_key=StorageKey.ERRORS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=query_processors,
    query_splitters=query_splitters,
    stream_loader=build_kafka_stream_loader_from_settings(
        StorageKey.ERRORS,
        processor=ErrorsProcessor(promoted_tag_columns),
        default_topic_name="events",
        replacement_topic_name="event-replacements",
        commit_log_topic_name="snuba-commit-log",
    ),
    replacer_processor=ErrorsReplacer(
        schema=schema,
        required_columns=required_columns,
        tag_column_map={
            "tags": promoted_tag_columns,
            "contexts": {}
        },
        promoted_tags={
            "tags": list(promoted_tag_columns.keys()),
            "contexts": []
        },
        state_name=ReplacerState.ERRORS,
        use_promoted_prewhere=False,
    ),
)
Beispiel #17
0
        ("platform", String()),
        ("trace_id", UUID()),
        ("transaction_name", String()),
        ("version_name", String()),
        ("version_code", String()),
    ]
)

writable_columns = readable_columns + ColumnSet(
    [("retention_days", UInt(16)), ("partition", UInt(16)), ("offset", UInt(64))]
)

writable_schema = WritableTableSchema(
    columns=writable_columns,
    local_table_name=PROFILES_LOCAL_TABLE_NAME,
    dist_table_name=PROFILES_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.PROFILES,
)

writable_storage = WritableTableStorage(
    storage_key=StorageKey.PROFILES,
    storage_set_key=StorageSetKey.PROFILES,
    schema=writable_schema,
    query_processors=processors,
    mandatory_condition_checkers=[
        OrgIdEnforcer("organization_id"),
        ProjectIdEnforcer(),
    ],
    stream_loader=loader,
)
Beispiel #18
0
    "deleted",
    "retention_days",
]

storage = WritableTableStorage(
    storage_key=StorageKey.ERRORS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=[
        PostReplacementConsistencyEnforcer(
            project_column="project_id", replacer_state_name=ReplacerState.ERRORS,
        ),
        MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}),
        ArrayJoinKeyValueOptimizer("tags"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=ErrorsProcessor(promoted_tag_columns),
        default_topic="events",
        replacement_topic="errors-replacements",
    ),
    replacer_processor=ErrorsReplacer(
        write_schema=schema,
        read_schema=schema,
        required_columns=required_columns,
        tag_column_map={"tags": promoted_tag_columns, "contexts": {}},
        promoted_tags={"tags": list(promoted_tag_columns.keys()), "contexts": []},
        state_name=ReplacerState.ERRORS,
    ),
)
Beispiel #19
0
    }


storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=EventsProcessor(promoted_tag_columns),
            default_topic="events",
            replacement_topic="event-replacements",
            commit_log_topic="snuba-commit-log",
        ),
        replacer_processor=ErrorsReplacer(
            write_schema=schema,
            read_schema=schema,
            required_columns=[col.escaped for col in required_columns],
            tag_column_map=get_tag_column_map(),
            promoted_tags=get_promoted_tags(),
            state_name=ReplacerState.EVENTS,
        ),
    ),
    query_processors=[
        # TODO: This one should become an entirely separate storage and picked
        # in the storage selector.
        ReadOnlyTableSelector("sentry_dist", "sentry_dist_ro"),
        EventsColumnProcessor(),
        PrewhereProcessor(),
    ],
)
Beispiel #20
0
    dist_table_name=READ_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
)
materialized_view_schema = TableSchema(
    local_table_name=READ_LOCAL_MV_NAME,
    dist_table_name=READ_DIST_MV_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
    columns=read_columns,
)

# The raw table we write onto, and that potentially we could
# query.
raw_storage = WritableTableStorage(
    storage_key=StorageKey.SESSIONS_RAW,
    storage_set_key=StorageSetKey.SESSIONS,
    schema=raw_schema,
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        StorageKey.SESSIONS_RAW,
        processor=SessionsProcessor(),
        default_topic_name="ingest-sessions",
    ),
)
# The materialized view we query aggregate data from.
materialized_storage = ReadableTableStorage(
    storage_key=StorageKey.SESSIONS_HOURLY,
    storage_set_key=StorageSetKey.SESSIONS,
    schema=read_schema,
    query_processors=[PrewhereProcessor(["project_id", "org_id"])],
)
Beispiel #21
0
    ("category", UInt(8)),
])

materialized_view_schema = TableSchema(
    local_table_name="outcomes_mv_hourly_local",
    dist_table_name="outcomes_mv_hourly_dist",
    storage_set_key=StorageSetKey.OUTCOMES,
    columns=materialized_view_columns,
)

raw_storage = WritableTableStorage(
    storage_key=StorageKey.OUTCOMES_RAW,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=raw_schema,
    query_processors=[TableRateLimit()],
    mandatory_condition_checkers=[OrgIdEnforcer()],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=OutcomesProcessor(),
        default_topic=Topic.OUTCOMES,
    ),
)

materialized_storage = ReadableTableStorage(
    storage_key=StorageKey.OUTCOMES_HOURLY,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=read_schema,
    query_processors=[
        PrewhereProcessor(["project_id", "org_id"]),
        TableRateLimit()
    ],
    mandatory_condition_checkers=[OrgIdEnforcer()],
Beispiel #22
0
    # sdk info
    ("sdk_name", String()),
    ("sdk_version", String()),
    ("tags", Nested([("key", String()), ("value", String())])),
    # deletion info
    ("retention_days", UInt(16)),
    ("partition", UInt(16)),
    ("offset", UInt(64)),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name=LOCAL_TABLE_NAME,
    dist_table_name=DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.REPLAYS,
)

# TODO: set up deadletter queue for bad messages.

storage = WritableTableStorage(
    storage_key=StorageKey.REPLAYS,
    storage_set_key=StorageSetKey.REPLAYS,
    schema=schema,
    query_processors=[TableRateLimit()],
    mandatory_condition_checkers=[ProjectIdEnforcer()],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=ReplaysProcessor(),
        default_topic=Topic.REPLAYEVENTS,
    ),
)
Beispiel #23
0
    dist_materialized_view_name="outcomes_mv_hourly_dist",
    prewhere_candidates=["project_id", "org_id"],
    columns=materialized_view_columns,
    query=query,
    local_source_table_name=WRITE_LOCAL_TABLE_NAME,
    local_destination_table_name=READ_LOCAL_TABLE_NAME,
    dist_source_table_name=WRITE_DIST_TABLE_NAME,
    dist_destination_table_name=READ_DIST_TABLE_NAME,
)

raw_storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=raw_schema, write_schema=raw_schema),
    table_writer=TableWriter(
        write_schema=raw_schema,
        stream_loader=KafkaStreamLoader(
            processor=OutcomesProcessor(),
            default_topic="outcomes",
        ),
    ),
    query_processors=[],
)

materialized_storage = ReadableTableStorage(
    schemas=StorageSchemas(
        read_schema=read_schema,
        write_schema=None,
        intermediary_schemas=[materialized_view_schema],
    ),
    query_processors=[PrewhereProcessor()],
)
Beispiel #24
0
    storage_set_key=StorageSetKey.SESSIONS,
    prewhere_candidates=["project_id", "org_id"],
)
materialized_view_schema = TableSchema(
    local_table_name=READ_LOCAL_MV_NAME,
    dist_table_name=READ_DIST_MV_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
    prewhere_candidates=["project_id", "org_id"],
    columns=read_columns,
)

# The raw table we write onto, and that potentially we could
# query.
raw_storage = WritableTableStorage(
    storage_key=StorageKey.SESSIONS_RAW,
    storage_set_key=StorageSetKey.SESSIONS,
    schema=raw_schema,
    query_processors=[],
    stream_loader=KafkaStreamLoader(
        processor=SessionsProcessor(),
        default_topic="ingest-sessions",
    ),
)
# The materialized view we query aggregate data from.
materialized_storage = ReadableTableStorage(
    storage_key=StorageKey.SESSIONS_HOURLY,
    storage_set_key=StorageSetKey.SESSIONS,
    schema=read_schema,
    query_processors=[PrewhereProcessor()],
)
Beispiel #25
0
    ("clickhouse_queries.is_duplicate", Array(UInt(8))),
    ("clickhouse_queries.consistent", Array(UInt(8))),
    ("clickhouse_queries.all_columns", Array(Array(String()))),
    ("clickhouse_queries.or_conditions", Array(UInt(8))),
    ("clickhouse_queries.where_columns", Array(Array(String()))),
    ("clickhouse_queries.where_mapping_columns", Array(Array(String()))),
    ("clickhouse_queries.groupby_columns", Array(Array(String()))),
    ("clickhouse_queries.array_join_columns", Array(Array(String()))),
])

# Note, we are using the simplified WritableTableSchema class here instead of
# the MergeTreeSchema that corresponds to the actual table engine. This is because
# the querylog table isn't generated by the old migration system.
schema = WritableTableSchema(
    columns=columns,
    local_table_name="querylog_local",
    dist_table_name="querylog_dist",
    storage_set_key=StorageSetKey.QUERYLOG,
)

storage = WritableTableStorage(
    storage_key=StorageKey.QUERYLOG,
    storage_set_key=StorageSetKey.QUERYLOG,
    schema=schema,
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=QuerylogProcessor(),
        default_topic=Topic.QUERYLOG,
    ),
)
Beispiel #26
0
            ("max_threads", UInt(8)),
            ("num_days", UInt(32)),
            ("clickhouse_table", LowCardinality(String())),
            ("query_id", String()),
            ("is_duplicate", UInt(8)),
            ("consistent", UInt(8)),
        ]),
    ),
])

schema = MergeTreeSchema(
    columns=columns,
    local_table_name="querylog_local",
    dist_table_name="querylog_dist",
    order_by="(toStartOfDay(timestamp), request_id)",
    partition_by="(toMonday(timestamp))",
    sample_expr="request_id",
)

storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=QuerylogProcessor(),
            default_topic=settings.QUERIES_TOPIC,
        ),
    ),
    query_processors=[],
)
Beispiel #27
0
    ),
    ("start_ts", DateTime()),
    ("start_ns", UInt(32)),
    ("finish_ts", DateTime()),
    ("finish_ns", UInt(32)),
    ("duration_ms", UInt(32)),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
    ("retention_days", UInt(16)),
    ("deleted", UInt(8)),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="spans_experimental_local",
    dist_table_name="spans_experimental_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
)

storage = WritableTableStorage(
    storage_key=StorageKey.SPANS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[PrewhereProcessor()],
    stream_loader=KafkaStreamLoader(
        processor=SpansMessageProcessor(),
        default_topic="events",
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
)
Beispiel #28
0
storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[
        MappingColumnPromoter(
            mapping_specs={
                "tags": {
                    "environment": "environment",
                    "sentry:release": "release",
                    "sentry:dist": "dist",
                    "sentry:user": "******",
                },
                "contexts": {"trace.trace_id": "trace_id", "trace.span_id": "span_id"},
            }
        ),
        UUIDColumnProcessor(set(["event_id", "trace_id"])),
        HexIntColumnProcessor({"span_id"}),
        EventsBooleanContextsProcessor(),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        EmptyTagConditionProcessor(),
        ArrayJoinKeyValueOptimizer("tags"),
        ArrayJoinKeyValueOptimizer("measurements"),
        ArrayJoinKeyValueOptimizer("span_op_breakdowns"),
        PrewhereProcessor(
            [
                "event_id",
                "trace_id",
                "span_id",
                "transaction_name",
                "transaction",
                "title",
            ]
        ),
        TableRateLimit(),
    ],
    stream_loader=build_kafka_stream_loader_from_settings(
        processor=TransactionsMessageProcessor(),
        default_topic=Topic.EVENTS,
        commit_log_topic=Topic.COMMIT_LOG,
        subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS,
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
    mandatory_condition_checkers=[ProjectIdEnforcer()],
    writer_options={"insert_allow_materialized_columns": 1},
)
Beispiel #29
0
    # during create statement
    # (https://github.com/ClickHouse/ClickHouse/issues/12586), so the
    # materialization is added with a migration.
    skipped_cols_on_creation={"_tags_hash_map"},
)


storage = WritableTableStorage(
    storage_key=StorageKey.TRANSACTIONS,
    storage_set_key=StorageSetKey.TRANSACTIONS,
    schema=schema,
    query_processors=[
        NestedFieldConditionOptimizer(
            "contexts",
            "_contexts_flattened",
            {"start_ts", "finish_ts"},
            BEGINNING_OF_TIME,
        ),
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        TransactionColumnProcessor(),
        ArrayJoinKeyValueOptimizer("tags"),
        ArrayJoinKeyValueOptimizer("measurements"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=TransactionsMessageProcessor(), default_topic="events",
    ),
    query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")],
    writer_options={"insert_allow_materialized_columns": 1},
)
Beispiel #30
0
    ("outcome", UInt(8)),
    ("reason", String()),
    ("times_seen", UInt(64)),
])

materialized_view_schema = TableSchema(
    local_table_name="outcomes_mv_hourly_local",
    dist_table_name="outcomes_mv_hourly_dist",
    storage_set_key=StorageSetKey.OUTCOMES,
    columns=materialized_view_columns,
)

raw_storage = WritableTableStorage(
    storage_key=StorageKey.OUTCOMES_RAW,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=raw_schema,
    query_processors=[],
    stream_loader=build_kafka_stream_loader_from_settings(
        StorageKey.OUTCOMES_RAW,
        processor=OutcomesProcessor(),
        default_topic_name="outcomes",
    ),
)

materialized_storage = ReadableTableStorage(
    storage_key=StorageKey.OUTCOMES_HOURLY,
    storage_set_key=StorageSetKey.OUTCOMES,
    schema=read_schema,
    query_processors=[PrewhereProcessor(["project_id", "org_id"])],
)