def __init__(self, storage: WritableTableStorage, metrics: MetricsBackend) -> None: self.clickhouse = storage.get_cluster().get_query_connection( ClickhouseClientSettings.REPLACE) self.metrics = metrics processor = storage.get_table_writer().get_replacer_processor() assert ( processor ), f"This storage writer does not support replacements {storage.get_storage_key().value}" self.__replacer_processor = processor self.__table_name = ( storage.get_table_writer().get_schema().get_local_table_name())
def __init__(self, storage: WritableTableStorage, metrics: MetricsBackend) -> None: self.__storage = storage self.metrics = metrics processor = storage.get_table_writer().get_replacer_processor() assert ( processor ), f"This storage writer does not support replacements {storage.get_storage_key().value}" self.__replacer_processor = processor self.__database_name = storage.get_cluster().get_database() self.__sharded_pool = RoundRobinConnectionPool( self.__storage.get_cluster())
def get_active_partitions(clickhouse: ClickhousePool, storage: WritableTableStorage, database: str, table: str) -> Sequence[util.Part]: response = clickhouse.execute( """ SELECT DISTINCT partition FROM system.parts WHERE database = %(database)s AND table = %(table)s AND active = 1 """, { "database": database, "table": table }, ) schema = storage.get_schema() assert isinstance(schema, TableSchema) part_format = schema.get_part_format() assert part_format is not None return [ util.decode_part_str(part, part_format) for part, in response.results ]
def __build_batch_writer( self, storage: WritableTableStorage) -> ProcessedMessageBatchWriter: replacement_batch_writer: Optional[ReplacementBatchWriter] stream_loader = storage.get_table_writer().get_stream_loader() replacement_topic_spec = stream_loader.get_replacement_topic_spec() default_topic_spec = stream_loader.get_default_topic_spec() if replacement_topic_spec is not None: # XXX: The producer is flushed when closed on strategy teardown # after an assignment is revoked, but never explicitly closed. # XXX: This assumes that the Kafka cluster used for the input topic # to the storage is the same as the replacement topic. replacement_batch_writer = ReplacementBatchWriter( ConfluentKafkaProducer( build_kafka_producer_configuration( default_topic_spec.topic, override_params={ "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, )), Topic(replacement_topic_spec.topic_name), ) else: replacement_batch_writer = None return ProcessedMessageBatchWriter( InsertBatchWriter( storage.get_table_writer().get_batch_writer( self.__metrics, { "load_balancing": "in_order", "insert_distributed_sync": 1 }, ), MetricsWrapper( self.__metrics, "insertions", {"storage": storage.get_storage_key().value}, ), ), replacement_batch_writer, )
def __init__(self, clickhouse: ClickhousePool, storage: WritableTableStorage, metrics: MetricsBackend) -> None: self.clickhouse = clickhouse self.metrics = metrics processor = storage.get_table_writer().get_replacer_processor() assert ( processor ), f"This storage writer does not support replacements {type(storage)}" self.__replacer_processor = processor
def run_cleanup( clickhouse: ClickhousePool, storage: WritableTableStorage, database: str, dry_run: bool = True, ) -> int: table = storage.get_table_writer().get_schema().get_local_table_name() active_parts = get_active_partitions(clickhouse, storage, database, table) stale_parts = filter_stale_partitions(active_parts) drop_partitions(clickhouse, database, table, stale_parts, dry_run=dry_run) return len(stale_parts)
def __init__( self, storage: WritableTableStorage, consumer_group: str, metrics: MetricsBackend, ) -> None: self.__storage = storage self.metrics = metrics processor = storage.get_table_writer().get_replacer_processor() assert ( processor ), f"This storage writer does not support replacements {storage.get_storage_key().value}" self.__replacer_processor = processor self.__database_name = storage.get_cluster().get_database() self.__sharded_pool = RoundRobinConnectionPool( self.__storage.get_cluster()) self.__rate_limiter = RateLimiter("replacements") self.__last_offset_processed_per_partition: MutableMapping[ str, int] = dict() self.__consumer_group = consumer_group
def __init__( self, storage: WritableTableStorage, metrics: MetricsBackend, producer: Optional[ConfluentKafkaProducer] = None, replacements_topic: Optional[Topic] = None, ) -> None: self.__storage = storage self.producer = producer self.replacements_topic = replacements_topic self.metrics = metrics table_writer = storage.get_table_writer() self.__writer = BatchWriterEncoderWrapper( table_writer.get_batch_writer(metrics, { "load_balancing": "in_order", "insert_distributed_sync": 1 }), JSONRowEncoder(), ) self.__processor: MessageProcessor self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()
def __init__( self, storage: WritableTableStorage, metrics: MetricsBackend, producer: Optional[ConfluentKafkaProducer] = None, replacements_topic: Optional[Topic] = None, rapidjson_deserialize: bool = False, rapidjson_serialize: bool = False, ) -> None: self.__storage = storage self.producer = producer self.replacements_topic = replacements_topic self.metrics = metrics table_writer = storage.get_table_writer() self.__writer = table_writer.get_writer( { "load_balancing": "in_order", "insert_distributed_sync": 1 }, rapidjson_serialize=rapidjson_serialize, ) self.__rapidjson_deserialize = rapidjson_deserialize self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()
dist_table_name="sentry_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=mandatory_conditions, part_format=[util.PartSegment.DATE, util.PartSegment.RETENTION_DAYS], ) storage = WritableTableStorage( storage_key=StorageKey.EVENTS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=query_processors, stream_loader=build_kafka_stream_loader_from_settings( processor=EventsProcessor(promoted_tag_columns), default_topic=Topic.EVENTS, replacement_topic=Topic.EVENT_REPLACEMENTS_LEGACY, commit_log_topic=Topic.COMMIT_LOG, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_EVENTS, ), query_splitters=query_splitters, mandatory_condition_checkers=[ProjectIdEnforcer()], replacer_processor=ErrorsReplacer( schema=schema, required_columns=[col.escaped for col in required_columns], tag_column_map=get_tag_column_map(), promoted_tags=get_promoted_tags(), state_name=ReplacerState.EVENTS, use_promoted_prewhere=True, ), )
schema = WritableTableSchema( columns=all_columns, local_table_name="sentry_local", dist_table_name="sentry_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=mandatory_conditions, prewhere_candidates=prewhere_candidates, ) storage = WritableTableStorage( storage_key=StorageKey.EVENTS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=query_processors, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ), query_splitters=query_splitters, replacer_processor=ErrorsReplacer( schema=schema, required_columns=[col.escaped for col in required_columns], tag_column_map=get_tag_column_map(), promoted_tags=get_promoted_tags(), state_name=ReplacerState.EVENTS, ), )
prewhere_candidates=["event_id", "transaction_name", "project_id"], order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))", partition_by="(retention_days, toMonday(_finish_date))", version_column="deleted", sample_expr=None, migration_function=transactions_migrations, ) storage = WritableTableStorage( schemas=StorageSchemas(read_schema=schema, write_schema=schema), table_writer=TransactionsTableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=TransactionsMessageProcessor(), default_topic="events", ), ), query_processors=[ NestedFieldConditionOptimizer( "tags", "_tags_flattened", {"start_ts", "finish_ts"}, BEGINNING_OF_TIME, ), NestedFieldConditionOptimizer( "contexts", "_contexts_flattened", {"start_ts", "finish_ts"}, BEGINNING_OF_TIME, ), TransactionColumnProcessor(), PrewhereProcessor(), ], )
dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[], prewhere_candidates=[ "event_id", "transaction_name", "transaction", "title" ], part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[ MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), EventIdColumnProcessor(), ArrayJoinKeyValueOptimizer("tags"), ArrayJoinKeyValueOptimizer("measurements"), UUIDColumnProcessor(set(["event_id", "trace_id"])), PrewhereProcessor(), ], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.TRANSACTIONS, processor=TransactionsMessageProcessor(), default_topic_name="events", commit_log_topic_name="snuba-commit-log", ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], writer_options={"insert_allow_materialized_columns": 1}, )
columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[], part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=query_processors, stream_loader=build_kafka_stream_loader_from_settings( processor=TransactionsMessageProcessor(), pre_filter=KafkaHeaderFilterWithBypass("transaction_forwarder", "0", 100), default_topic=Topic.TRANSACTIONS, commit_log_topic=Topic.TRANSACTIONS_COMMIT_LOG, subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL, subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_TRANSACTIONS, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS, ), query_splitters=query_splitters, mandatory_condition_checkers=mandatory_condition_checkers, writer_options={ "insert_allow_materialized_columns": 1, "input_format_skip_unknown_fields": 1, }, )
materialized_view_schema = MaterializedViewSchema( local_materialized_view_name="outcomes_mv_hourly_local", dist_materialized_view_name="outcomes_mv_hourly_dist", storage_set_key=StorageSetKey.OUTCOMES, prewhere_candidates=["project_id", "org_id"], columns=materialized_view_columns, query=query, local_source_table_name=WRITE_LOCAL_TABLE_NAME, local_destination_table_name=READ_LOCAL_TABLE_NAME, dist_source_table_name=WRITE_DIST_TABLE_NAME, dist_destination_table_name=READ_DIST_TABLE_NAME, ) raw_storage = WritableTableStorage( storage_key=StorageKey.OUTCOMES_RAW, storage_set_key=StorageSetKey.OUTCOMES, schema=raw_schema, query_processors=[], stream_loader=KafkaStreamLoader( processor=OutcomesProcessor(), default_topic="outcomes", ), ) materialized_storage = ReadableTableStorage( storage_key=StorageKey.OUTCOMES_HOURLY, storage_set_key=StorageSetKey.OUTCOMES, schema=read_schema, query_processors=[PrewhereProcessor()], )
) storage = WritableTableStorage( storage_key=StorageKey.ERRORS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=query_processors, query_splitters=query_splitters, stream_loader=build_kafka_stream_loader_from_settings( StorageKey.ERRORS, processor=ErrorsProcessor(promoted_tag_columns), default_topic_name="events", replacement_topic_name="event-replacements", commit_log_topic_name="snuba-commit-log", ), replacer_processor=ErrorsReplacer( schema=schema, required_columns=required_columns, tag_column_map={ "tags": promoted_tag_columns, "contexts": {} }, promoted_tags={ "tags": list(promoted_tag_columns.keys()), "contexts": [] }, state_name=ReplacerState.ERRORS, use_promoted_prewhere=False, ), )
("platform", String()), ("trace_id", UUID()), ("transaction_name", String()), ("version_name", String()), ("version_code", String()), ] ) writable_columns = readable_columns + ColumnSet( [("retention_days", UInt(16)), ("partition", UInt(16)), ("offset", UInt(64))] ) writable_schema = WritableTableSchema( columns=writable_columns, local_table_name=PROFILES_LOCAL_TABLE_NAME, dist_table_name=PROFILES_DIST_TABLE_NAME, storage_set_key=StorageSetKey.PROFILES, ) writable_storage = WritableTableStorage( storage_key=StorageKey.PROFILES, storage_set_key=StorageSetKey.PROFILES, schema=writable_schema, query_processors=processors, mandatory_condition_checkers=[ OrgIdEnforcer("organization_id"), ProjectIdEnforcer(), ], stream_loader=loader, )
"deleted", "retention_days", ] storage = WritableTableStorage( storage_key=StorageKey.ERRORS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=[ PostReplacementConsistencyEnforcer( project_column="project_id", replacer_state_name=ReplacerState.ERRORS, ), MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor(), ], stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="errors-replacements", ), replacer_processor=ErrorsReplacer( write_schema=schema, read_schema=schema, required_columns=required_columns, tag_column_map={"tags": promoted_tag_columns, "contexts": {}}, promoted_tags={"tags": list(promoted_tag_columns.keys()), "contexts": []}, state_name=ReplacerState.ERRORS, ), )
} storage = WritableTableStorage( schemas=StorageSchemas(read_schema=schema, write_schema=schema), table_writer=TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ), replacer_processor=ErrorsReplacer( write_schema=schema, read_schema=schema, required_columns=[col.escaped for col in required_columns], tag_column_map=get_tag_column_map(), promoted_tags=get_promoted_tags(), state_name=ReplacerState.EVENTS, ), ), query_processors=[ # TODO: This one should become an entirely separate storage and picked # in the storage selector. ReadOnlyTableSelector("sentry_dist", "sentry_dist_ro"), EventsColumnProcessor(), PrewhereProcessor(), ], )
dist_table_name=READ_DIST_TABLE_NAME, storage_set_key=StorageSetKey.SESSIONS, ) materialized_view_schema = TableSchema( local_table_name=READ_LOCAL_MV_NAME, dist_table_name=READ_DIST_MV_NAME, storage_set_key=StorageSetKey.SESSIONS, columns=read_columns, ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.SESSIONS_RAW, processor=SessionsProcessor(), default_topic_name="ingest-sessions", ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], )
("category", UInt(8)), ]) materialized_view_schema = TableSchema( local_table_name="outcomes_mv_hourly_local", dist_table_name="outcomes_mv_hourly_dist", storage_set_key=StorageSetKey.OUTCOMES, columns=materialized_view_columns, ) raw_storage = WritableTableStorage( storage_key=StorageKey.OUTCOMES_RAW, storage_set_key=StorageSetKey.OUTCOMES, schema=raw_schema, query_processors=[TableRateLimit()], mandatory_condition_checkers=[OrgIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=OutcomesProcessor(), default_topic=Topic.OUTCOMES, ), ) materialized_storage = ReadableTableStorage( storage_key=StorageKey.OUTCOMES_HOURLY, storage_set_key=StorageSetKey.OUTCOMES, schema=read_schema, query_processors=[ PrewhereProcessor(["project_id", "org_id"]), TableRateLimit() ], mandatory_condition_checkers=[OrgIdEnforcer()],
# sdk info ("sdk_name", String()), ("sdk_version", String()), ("tags", Nested([("key", String()), ("value", String())])), # deletion info ("retention_days", UInt(16)), ("partition", UInt(16)), ("offset", UInt(64)), ]) schema = WritableTableSchema( columns=columns, local_table_name=LOCAL_TABLE_NAME, dist_table_name=DIST_TABLE_NAME, storage_set_key=StorageSetKey.REPLAYS, ) # TODO: set up deadletter queue for bad messages. storage = WritableTableStorage( storage_key=StorageKey.REPLAYS, storage_set_key=StorageSetKey.REPLAYS, schema=schema, query_processors=[TableRateLimit()], mandatory_condition_checkers=[ProjectIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=ReplaysProcessor(), default_topic=Topic.REPLAYEVENTS, ), )
dist_materialized_view_name="outcomes_mv_hourly_dist", prewhere_candidates=["project_id", "org_id"], columns=materialized_view_columns, query=query, local_source_table_name=WRITE_LOCAL_TABLE_NAME, local_destination_table_name=READ_LOCAL_TABLE_NAME, dist_source_table_name=WRITE_DIST_TABLE_NAME, dist_destination_table_name=READ_DIST_TABLE_NAME, ) raw_storage = WritableTableStorage( schemas=StorageSchemas(read_schema=raw_schema, write_schema=raw_schema), table_writer=TableWriter( write_schema=raw_schema, stream_loader=KafkaStreamLoader( processor=OutcomesProcessor(), default_topic="outcomes", ), ), query_processors=[], ) materialized_storage = ReadableTableStorage( schemas=StorageSchemas( read_schema=read_schema, write_schema=None, intermediary_schemas=[materialized_view_schema], ), query_processors=[PrewhereProcessor()], )
storage_set_key=StorageSetKey.SESSIONS, prewhere_candidates=["project_id", "org_id"], ) materialized_view_schema = TableSchema( local_table_name=READ_LOCAL_MV_NAME, dist_table_name=READ_DIST_MV_NAME, storage_set_key=StorageSetKey.SESSIONS, prewhere_candidates=["project_id", "org_id"], columns=read_columns, ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[], stream_loader=KafkaStreamLoader( processor=SessionsProcessor(), default_topic="ingest-sessions", ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor()], )
("clickhouse_queries.is_duplicate", Array(UInt(8))), ("clickhouse_queries.consistent", Array(UInt(8))), ("clickhouse_queries.all_columns", Array(Array(String()))), ("clickhouse_queries.or_conditions", Array(UInt(8))), ("clickhouse_queries.where_columns", Array(Array(String()))), ("clickhouse_queries.where_mapping_columns", Array(Array(String()))), ("clickhouse_queries.groupby_columns", Array(Array(String()))), ("clickhouse_queries.array_join_columns", Array(Array(String()))), ]) # Note, we are using the simplified WritableTableSchema class here instead of # the MergeTreeSchema that corresponds to the actual table engine. This is because # the querylog table isn't generated by the old migration system. schema = WritableTableSchema( columns=columns, local_table_name="querylog_local", dist_table_name="querylog_dist", storage_set_key=StorageSetKey.QUERYLOG, ) storage = WritableTableStorage( storage_key=StorageKey.QUERYLOG, storage_set_key=StorageSetKey.QUERYLOG, schema=schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=QuerylogProcessor(), default_topic=Topic.QUERYLOG, ), )
("max_threads", UInt(8)), ("num_days", UInt(32)), ("clickhouse_table", LowCardinality(String())), ("query_id", String()), ("is_duplicate", UInt(8)), ("consistent", UInt(8)), ]), ), ]) schema = MergeTreeSchema( columns=columns, local_table_name="querylog_local", dist_table_name="querylog_dist", order_by="(toStartOfDay(timestamp), request_id)", partition_by="(toMonday(timestamp))", sample_expr="request_id", ) storage = WritableTableStorage( schemas=StorageSchemas(read_schema=schema, write_schema=schema), table_writer=TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=QuerylogProcessor(), default_topic=settings.QUERIES_TOPIC, ), ), query_processors=[], )
), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("retention_days", UInt(16)), ("deleted", UInt(8)), ]) schema = WritableTableSchema( columns=columns, local_table_name="spans_experimental_local", dist_table_name="spans_experimental_dist", storage_set_key=StorageSetKey.TRANSACTIONS, ) storage = WritableTableStorage( storage_key=StorageKey.SPANS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[PrewhereProcessor()], stream_loader=KafkaStreamLoader( processor=SpansMessageProcessor(), default_topic="events", ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], )
storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[ MappingColumnPromoter( mapping_specs={ "tags": { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", }, "contexts": {"trace.trace_id": "trace_id", "trace.span_id": "span_id"}, } ), UUIDColumnProcessor(set(["event_id", "trace_id"])), HexIntColumnProcessor({"span_id"}), EventsBooleanContextsProcessor(), MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), EmptyTagConditionProcessor(), ArrayJoinKeyValueOptimizer("tags"), ArrayJoinKeyValueOptimizer("measurements"), ArrayJoinKeyValueOptimizer("span_op_breakdowns"), PrewhereProcessor( [ "event_id", "trace_id", "span_id", "transaction_name", "transaction", "title", ] ), TableRateLimit(), ], stream_loader=build_kafka_stream_loader_from_settings( processor=TransactionsMessageProcessor(), default_topic=Topic.EVENTS, commit_log_topic=Topic.COMMIT_LOG, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS, ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], mandatory_condition_checkers=[ProjectIdEnforcer()], writer_options={"insert_allow_materialized_columns": 1}, )
# during create statement # (https://github.com/ClickHouse/ClickHouse/issues/12586), so the # materialization is added with a migration. skipped_cols_on_creation={"_tags_hash_map"}, ) storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[ NestedFieldConditionOptimizer( "contexts", "_contexts_flattened", {"start_ts", "finish_ts"}, BEGINNING_OF_TIME, ), MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), TransactionColumnProcessor(), ArrayJoinKeyValueOptimizer("tags"), ArrayJoinKeyValueOptimizer("measurements"), PrewhereProcessor(), ], stream_loader=KafkaStreamLoader( processor=TransactionsMessageProcessor(), default_topic="events", ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], writer_options={"insert_allow_materialized_columns": 1}, )
("outcome", UInt(8)), ("reason", String()), ("times_seen", UInt(64)), ]) materialized_view_schema = TableSchema( local_table_name="outcomes_mv_hourly_local", dist_table_name="outcomes_mv_hourly_dist", storage_set_key=StorageSetKey.OUTCOMES, columns=materialized_view_columns, ) raw_storage = WritableTableStorage( storage_key=StorageKey.OUTCOMES_RAW, storage_set_key=StorageSetKey.OUTCOMES, schema=raw_schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.OUTCOMES_RAW, processor=OutcomesProcessor(), default_topic_name="outcomes", ), ) materialized_storage = ReadableTableStorage( storage_key=StorageKey.OUTCOMES_HOURLY, storage_set_key=StorageSetKey.OUTCOMES, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], )