READ_DIST_MV_NAME = "sessions_hourly_mv_dist" all_columns = ColumnSet( [ ("session_id", UUID()), ("distinct_id", UUID()), ("quantity", UInt(32)), ("seq", UInt(64)), ("org_id", UInt(64)), ("project_id", UInt(64)), ("retention_days", UInt(16)), ("duration", UInt(32)), ("status", UInt(8)), ("errors", UInt(16)), ("received", DateTime()), ("started", DateTime()), ("release", String()), ("environment", String()), ("user_agent", String()), ("os", String()), ] ) raw_schema = WritableTableSchema( columns=all_columns, local_table_name=WRITE_LOCAL_TABLE_NAME, dist_table_name=WRITE_DIST_TABLE_NAME, storage_set_key=StorageSetKey.SESSIONS, )
columns = [ Column("project_id", UInt(64)), Column("transaction_id", UUID()), Column("trace_id", UUID()), Column("transaction_span_id", UInt(64)), Column("span_id", UInt(64)), Column("parent_span_id", Nullable(UInt(64))), Column("transaction_name", LowCardinality(String())), Column("description", String()), # description in span Column("op", LowCardinality(String())), Column( "status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)), ), Column("start_ts", DateTime()), Column("start_ns", UInt(32)), Column("finish_ts", DateTime()), Column("finish_ns", UInt(32)), Column("duration_ms", UInt(32)), tags_col, Column("retention_days", UInt(16)), Column("deleted", UInt(8)), ] class Migration(migration.MultiStepMigration): blocking = False def forwards_local(self) -> Sequence[operations.Operation]: return [
from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.prewhere import PrewhereProcessor from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet([ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", String()), ("transaction_hash", UInt(64, Modifiers(readonly=True))), ("transaction_op", String()), ("transaction_status", UInt(8)), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("duration", UInt(32)), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))),
Float, Nested, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers # List from 0001_events_initial hierachical hashes columns columns: List[Column[Modifiers]] = [ Column("event_id", FixedString(32)), Column("project_id", UInt(64)), Column("group_id", UInt(64)), Column("timestamp", DateTime()), Column("deleted", UInt(8)), Column("retention_days", UInt(16)), Column("platform", String(Modifiers(nullable=True))), Column("message", String(Modifiers(nullable=True))), Column("primary_hash", FixedString(32, Modifiers(nullable=True))), Column("hierarchical_hashes", Array(FixedString(32))), Column("received", DateTime(Modifiers(nullable=True))), Column("search_message", String(Modifiers(nullable=True))), Column("title", String(Modifiers(nullable=True))), Column("location", String(Modifiers(nullable=True))), Column("user_id", String(Modifiers(nullable=True))), Column("username", String(Modifiers(nullable=True))), Column("email", String(Modifiers(nullable=True))), Column("ip_address", String(Modifiers(nullable=True))), Column("geo_country_code", String(Modifiers(nullable=True))),
from typing import Sequence from snuba.clickhouse.columns import Column, DateTime, Enum, String, UInt, WithDefault from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations from snuba.migrations.context import Context from snuba.migrations.status import Status from snuba.migrations.table_engines import Distributed, ReplacingMergeTree columns = [ Column("group", String()), Column("migration_id", String()), Column("timestamp", DateTime()), Column( "status", Enum([("completed", 0), ("in_progress", 1), ("not_started", 2)]), ), Column("version", WithDefault(UInt(64), "1")), ] class Migration(migration.Migration): """ This migration extends Migration instead of MultiStepMigration since it is responsible for bootstrapping the migration system itself. It skips setting the in progress status in the forwards method and the not started status in the backwards method. Since the migration table doesn't exist yet, we can't write any statuses until this migration is completed. """ blocking = False
from snuba.clickhouse.columns import ColumnSet, DateTime from snuba.clickhouse.columns import SchemaModifiers as Modifiers from snuba.clickhouse.columns import String, UInt from snuba.clickhouse.query import Query from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.schemas.tables import TableSchema from snuba.datasets.storage import ReadableTableStorage from snuba.datasets.storages import StorageKey from snuba.query import SelectedExpression from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.processors.null_column_caster import NullColumnCaster from snuba.query.query_settings import HTTPQuerySettings columns1 = ColumnSet([ ("not_mismatched", DateTime()), ("mismatched1", String(Modifiers(nullable=True))), ("mismatched2", UInt(64, Modifiers(nullable=True))), ]) columns2 = ColumnSet([ ("timestamp", DateTime()), ("mismatched1", String()), # non-nullable by default ("mismatched2", UInt(64, Modifiers(nullable=False))), ]) schema1 = TableSchema( columns=columns1, local_table_name="discover_local", dist_table_name="discover_dist", storage_set_key=StorageSetKey.DISCOVER,
MaterializedViewSchema, ) from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import KafkaStreamLoader from snuba.query.processors.prewhere import PrewhereProcessor WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local" WRITE_DIST_TABLE_NAME = "outcomes_raw_dist" READ_LOCAL_TABLE_NAME = "outcomes_hourly_local" READ_DIST_TABLE_NAME = "outcomes_hourly_dist" write_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("key_id", Nullable(UInt(64))), ("timestamp", DateTime()), ("outcome", UInt(8)), ("reason", LowCardinality(Nullable(String()))), ("event_id", Nullable(UUID())), ]) raw_schema = MergeTreeSchema( columns=write_columns, # TODO: change to outcomes.raw_local when we add multi DB support local_table_name=WRITE_LOCAL_TABLE_NAME, dist_table_name=WRITE_DIST_TABLE_NAME, storage_set_key=StorageSetKey.OUTCOMES, order_by="(org_id, project_id, timestamp)", partition_by="(toMonday(timestamp))", settings={"index_granularity": "16384"}, )
pytest.param( Float(64, Modifier(nullable=True)), Float(64), Float(32, Modifier(nullable=True)), "Nullable(Float64)", id="floats", ), pytest.param( Date(), Date(), Date(Modifier(nullable=True)), "Date", id="dates", ), pytest.param( DateTime(), DateTime(), DateTime(Modifier(nullable=True)), "DateTime", id="datetimes", ), pytest.param( Array(String(Modifier(nullable=True))), Array(String()), Array(String()), "Array(Nullable(String))", id="arrays", ), pytest.param( Nested( [("key", String()), ("val", String(Modifier(nullable=True)))],
def __init__(self) -> None: self.__common_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", Nullable(String())), ("timestamp", DateTime()), ("platform", Nullable(String())), ("environment", Nullable(String())), ("release", Nullable(String())), ("dist", Nullable(String())), ("user", Nullable(String())), ("transaction", Nullable(String())), ("message", Nullable(String())), ("title", Nullable(String())), # User ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # SDK ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), # geo location context ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ] ) self.__events_columns = ColumnSet( [ ("group_id", Nullable(UInt(64))), ("primary_hash", Nullable(FixedString(32))), # Promoted tags ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ("search_message", Nullable(String())), ("location", Nullable(String())), ("culprit", Nullable(String())), ("received", Nullable(DateTime())), ("sdk_integrations", Nullable(Array(String()))), ("version", Nullable(String())), # exception interface ( "exception_stacks", Nested( [ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ] ), ), ( "exception_frames", Nested( [ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ] ), ), ("modules", Nested([("name", String()), ("version", String())])), ] ) self.__transactions_columns = ColumnSet( [ ("trace_id", Nullable(UUID())), ("span_id", Nullable(UInt(64))), ("transaction_hash", Nullable(UInt(64))), ("transaction_op", Nullable(String())), ("transaction_status", Nullable(UInt(8))), ("duration", Nullable(UInt(32))), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ] ) events_storage = get_storage(StorageKey.EVENTS) events_ro_storage = get_storage(StorageKey.EVENTS_RO) transactions_storage = get_storage(StorageKey.TRANSACTIONS) self.__time_group_columns: Mapping[str, str] = {} self.__time_parse_columns = ("timestamp",) super().__init__( storages=[events_storage, transactions_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=DiscoverQueryStorageSelector( events_table=events_storage, events_ro_table=events_ro_storage, abstract_events_columns=self.__events_columns, transactions_table=transactions_storage, abstract_transactions_columns=self.__transactions_columns, ), ), abstract_column_set=( self.__common_columns + self.__events_columns + self.__transactions_columns ), writable_storage=None, )
WithCodecs, WithDefault, ) from snuba.datasets.dataset_schemas import StorageSchemas from snuba.datasets.errors_processor import ErrorsProcessor from snuba.datasets.errors_replacer import ErrorsReplacer, ReplacerState from snuba.datasets.schemas.tables import ReplacingMergeTreeSchema from snuba.datasets.storage import WritableTableStorage from snuba.datasets.table_storage import TableWriter, KafkaStreamLoader from snuba.query.processors.prewhere import PrewhereProcessor all_columns = ColumnSet( [ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized(UInt(64), "cityHash64(toString(event_id))",), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")),
from snuba.query.processors.uniq_in_select_and_having import ( UniqInSelectAndHavingProcessor, ) from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet( [ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID(Modifiers(nullable=True))), ("span_id", UInt(64)), ("transaction_name", String()), ("transaction_hash", UInt(64, Modifiers(readonly=True))), ("transaction_op", String()), ("transaction_status", UInt(8)), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("duration", UInt(32)), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))),
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", ), ColumnToMapping(None, "geo_region", None, "contexts", "geo.region"), ColumnToMapping(None, "geo_city", None, "contexts", "geo.city"), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query) -> Tuple[str, List[str]]: if random.random() < float( state.get_config("discover_query_percentage", 0)): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) super().__init__( storages=[events_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), writable_storage=None, )
LowCardinality, Materialized, Nullable, String, UInt, UUID, WithCodecs, WithDefault, ) from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), ( ("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)]), ), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions ( ("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", UInt(8)),
Column, DateTime, Nullable, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines columns = [ # Kafka topic offset Column("offset", UInt(64)), Column("record_deleted", UInt(8)), # PG columns Column("project_id", UInt(64)), Column("group_id", UInt(64)), Column("date_added", Nullable(DateTime())), Column("user_id", Nullable(UInt(64))), Column("team_id", Nullable(UInt(64))), ] class Migration(migration.MultiStepMigration): blocking = False def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="groupassignee_local", columns=columns, engine=table_engines.ReplacingMergeTree(
FixedString, Float, Nested, Nullable, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines columns = [ Column("event_id", FixedString(32)), Column("project_id", UInt(64)), Column("group_id", UInt(64)), Column("timestamp", DateTime()), Column("deleted", UInt(8)), Column("retention_days", UInt(16)), Column("platform", Nullable(String())), Column("message", Nullable(String())), Column("primary_hash", Nullable(FixedString(32))), Column("received", Nullable(DateTime())), Column("search_message", Nullable(String())), Column("title", Nullable(String())), Column("location", Nullable(String())), Column("user_id", Nullable(String())), Column("username", Nullable(String())), Column("email", Nullable(String())), Column("ip_address", Nullable(String())), Column("geo_country_code", Nullable(String())), Column("geo_region", Nullable(String())),
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("duration", UInt(32)), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "sdk_name", String(Modifiers(low_cardinality=True, default="''"))), after="user_email", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "sdk_version", String(Modifiers(low_cardinality=True, default="''"))), after="sdk_name", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "transaction_status", UInt(8, Modifiers(default=str(UNKNOWN_SPAN_STATUS))), ), after="transaction_op", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("_tags_flattened", String()), after="tags", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("_contexts_flattened", String()), after="contexts", ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "user_hash", UInt(64, Modifiers(materialized="cityHash64(user)"))), after="user", ), # The following columns were originally created as non low cardinality strings operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("transaction_name", String(Modifiers(low_cardinality=True))), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "release", String(Modifiers(nullable=True, low_cardinality=True))), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "dist", String(Modifiers(nullable=True, low_cardinality=True))), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "sdk_name", String(Modifiers(low_cardinality=True, default="''"))), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "sdk_version", String(Modifiers(low_cardinality=True, default="''"))), ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "environment", String(Modifiers(nullable=True, low_cardinality=True)), ), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("message_timestamp", DateTime()), after="offset", ), operations.DropColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column_name="_start_date", ), operations.DropColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column_name="_finish_date", ), ]
from typing import Sequence from snuba.clickhouse.columns import Column, DateTime, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers columns: Sequence[Column[Modifiers]] = [ # Kafka topic offset Column("offset", UInt(64)), Column("record_deleted", UInt(8)), # PG columns Column("project_id", UInt(64)), Column("group_id", UInt(64)), Column("date_added", DateTime(Modifiers(nullable=True))), Column("user_id", UInt(64, Modifiers(nullable=True))), Column("team_id", UInt(64, Modifiers(nullable=True))), ] class Migration(migration.ClickhouseNodeMigration): blocking = False def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="groupassignee_local", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.EVENTS,
from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.storages.processors.replaced_groups import ( PostReplacementConsistencyEnforcer, ) from snuba.datasets.table_storage import KafkaStreamLoader from snuba.query.conditions import ConditionFunctions, binary_condition from snuba.query.expressions import Column, Literal from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_promoter import MappingColumnPromoter from snuba.query.processors.prewhere import PrewhereProcessor all_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", UUID()), ("event_hash", ReadOnly(UInt(64))), ("platform", String()), ("environment", Nullable(String())), ("release", Nullable(String())), ("dist", Nullable(String())), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", String()), ("user_hash", ReadOnly(UInt(64))), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())),
DateTime, Enum, FixedString, Float, IPv4, IPv6, String, UInt, ) from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), ( ("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)]), ), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions ( ("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", [UInt(8)]),
UUID, AggregateFunction, Column, DateTime, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import operations from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.processor import MAX_UINT32, NIL_UUID aggregate_columns_v1: Sequence[Column[Modifiers]] = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("started", DateTime()), Column("release", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(low_cardinality=True))), Column( "duration_quantiles", AggregateFunction("quantilesIf(0.5, 0.9)", [UInt(32), UInt(8)]), ), Column("sessions", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("users", AggregateFunction("uniqIf", [UUID(), UInt(8)])), Column("sessions_crashed", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_abnormal", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_errored", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("group_id", UInt(64)), after="project_id", ), operations.DropColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column_name="device_model", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("sdk_integrations", Array(String())), after="exception_frames", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("modules.name", Nested([("name", String())])), after="sdk_integrations", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("culprit", String(Modifiers(nullable=True))), after="sdk_integrations", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("search_message", String(Modifiers(nullable=True))), after="received", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("title", String(Modifiers(nullable=True))), after="search_message", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("location", String(Modifiers(nullable=True))), after="title", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("_tags_flattened", String()), after="tags", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column("message_timestamp", DateTime()), after="partition", ), ]
def __init__(self): metadata_columns = ColumnSet([ # optional stream related data ('offset', Nullable(UInt(64))), ('partition', Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ('level', Nullable(String())), ('logger', Nullable(String())), ('server_name', Nullable(String())), # future name: device_id? ('transaction', Nullable(String())), ('environment', Nullable(String())), ('sentry:release', Nullable(String())), ('sentry:dist', Nullable(String())), ('sentry:user', Nullable(String())), ('site', Nullable(String())), ('url', Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ('app_device', Nullable(String())), ('device', Nullable(String())), ('device_family', Nullable(String())), ('runtime', Nullable(String())), ('runtime_name', Nullable(String())), ('browser', Nullable(String())), ('browser_name', Nullable(String())), ('os', Nullable(String())), ('os_name', Nullable(String())), ('os_rooted', Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ('os_build', Nullable(String())), ('os_kernel_version', Nullable(String())), ('device_name', Nullable(String())), ('device_brand', Nullable(String())), ('device_locale', Nullable(String())), ('device_uuid', Nullable(String())), ('device_model_id', Nullable(String())), ('device_arch', Nullable(String())), ('device_battery_level', Nullable(Float(32))), ('device_orientation', Nullable(String())), ('device_simulator', Nullable(UInt(8))), ('device_online', Nullable(UInt(8))), ('device_charging', Nullable(UInt(8))), ]) required_columns = ColumnSet([ ('event_id', FixedString(32)), ('project_id', UInt(64)), ('group_id', UInt(64)), ('timestamp', DateTime()), ('deleted', UInt(8)), ('retention_days', UInt(16)), ]) all_columns = required_columns + [ # required for non-deleted ('platform', Nullable(String())), ('message', Nullable(String())), ('primary_hash', Nullable(FixedString(32))), ('received', Nullable(DateTime())), ('search_message', Nullable(String())), ('title', Nullable(String())), ('location', Nullable(String())), # optional user ('user_id', Nullable(String())), ('username', Nullable(String())), ('email', Nullable(String())), ('ip_address', Nullable(String())), # optional geo ('geo_country_code', Nullable(String())), ('geo_region', Nullable(String())), ('geo_city', Nullable(String())), ('sdk_name', Nullable(String())), ('sdk_version', Nullable(String())), ('type', Nullable(String())), ('version', Nullable(String())), ] + metadata_columns \ + promoted_context_columns \ + promoted_tag_columns \ + promoted_context_tag_columns \ + [ # other tags ('tags', Nested([ ('key', String()), ('value', String()), ])), # other context ('contexts', Nested([ ('key', String()), ('value', String()), ])), # http interface ('http_method', Nullable(String())), ('http_referer', Nullable(String())), # exception interface ('exception_stacks', Nested([ ('type', Nullable(String())), ('value', Nullable(String())), ('mechanism_type', Nullable(String())), ('mechanism_handled', Nullable(UInt(8))), ])), ('exception_frames', Nested([ ('abs_path', Nullable(String())), ('filename', Nullable(String())), ('package', Nullable(String())), ('module', Nullable(String())), ('function', Nullable(String())), ('in_app', Nullable(UInt(8))), ('colno', Nullable(UInt(32))), ('lineno', Nullable(UInt(32))), ('stack_level', UInt(16)), ])), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ('culprit', Nullable(String())), ('sdk_integrations', Array(String())), ('modules', Nested([ ('name', String()), ('version', String()), ])), ] sample_expr = 'cityHash64(toString(event_id))' schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name='sentry_local', dist_table_name='sentry_dist', mandatory_conditions=[('deleted', '=', 0)], order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr, partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))', version_column='deleted', sample_expr=sample_expr, migration_function=events_migrations) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ) ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ 'time': 'timestamp', 'rtime': 'received' }, time_parse_columns=('timestamp', 'received') ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )
columns = ColumnSet([ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", Nullable(UInt(64))), ("transaction_name", LowCardinality(String())), ("description", String()), # description in span ("op", LowCardinality(String())), ( "status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)), ), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("retention_days", UInt(16)), ("deleted", UInt(8)), ]) schema = WritableTableSchema( columns=columns, local_table_name="spans_experimental_local", dist_table_name="spans_experimental_dist", storage_set_key=StorageSetKey.TRANSACTIONS,
from typing import List, Sequence from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers columns: List[Column[Modifiers]] = [ # primary key Column("organization_id", UInt(64)), Column("project_id", UInt(64)), Column("transaction_id", UUID()), Column("profile_id", UUID()), Column("received", DateTime()), # profiling data Column("profile", String(Modifiers(codecs=["LZ4HC(9)"]))), # filtering data Column("android_api_level", UInt(32, Modifiers(nullable=True))), Column("device_classification", String(Modifiers(low_cardinality=True))), Column("device_locale", String(Modifiers(low_cardinality=True))), Column("device_manufacturer", String(Modifiers(low_cardinality=True))), Column("device_model", String(Modifiers(low_cardinality=True))), Column("device_os_build_number", String(Modifiers(low_cardinality=True, nullable=True))), Column("device_os_name", String(Modifiers(low_cardinality=True))), Column("device_os_version", String(Modifiers(low_cardinality=True))), Column("duration_ns", UInt(64)), Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), Column("platform", String(Modifiers(low_cardinality=True))), Column("trace_id", UUID()),
else: return None EVENTS_COLUMNS = ColumnSet([ ("group_id", UInt(64, Modifiers(nullable=True))), ("primary_hash", FixedString(32, Modifiers(nullable=True))), # Promoted tags ("level", String(Modifiers(nullable=True))), ("logger", String(Modifiers(nullable=True))), ("server_name", String(Modifiers(nullable=True))), ("site", String(Modifiers(nullable=True))), ("url", String(Modifiers(nullable=True))), ("location", String(Modifiers(nullable=True))), ("culprit", String(Modifiers(nullable=True))), ("received", DateTime(Modifiers(nullable=True))), ("sdk_integrations", Array(String(), Modifiers(nullable=True))), ("version", String(Modifiers(nullable=True))), # exception interface ( "exception_stacks", Nested([ ("type", String(Modifiers(nullable=True))), ("value", String(Modifiers(nullable=True))), ("mechanism_type", String(Modifiers(nullable=True))), ("mechanism_handled", UInt(8, Modifiers(nullable=True))), ]), ), ( "exception_frames", Nested([
from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers columns: Sequence[Column[Modifiers]] = [ # Kafka topic offset Column("offset", UInt(64)), # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion # of the record. Having a dedicated clickhouse-only flag to identify this case seems # more consistent than add an additional value into the status field below that does not # exists on the Sentry side. Column("record_deleted", UInt(8)), # PG columns Column("project_id", UInt(64)), Column("id", UInt(64)), Column("status", UInt(8, Modifiers(nullable=True))), Column("last_seen", DateTime(Modifiers(nullable=True))), Column("first_seen", DateTime(Modifiers(nullable=True))), Column("active_at", DateTime(Modifiers(nullable=True))), Column("first_release_id", UInt(64, Modifiers(nullable=True))), ] class Migration(migration.ClickhouseNodeMigration): blocking = False def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.CDC, table_name="groupedmessage_local", columns=columns,
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) super().__init__( storages=[discover_storage], query_pipeline_builder=discover_pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import LowCardinality from snuba.processor import MAX_UINT32, NIL_UUID raw_columns = [ Column("session_id", UUID()), Column("distinct_id", UUID()), Column("seq", UInt(64)), Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("retention_days", UInt(16)), Column("duration", UInt(32)), Column("status", UInt(8)), Column("errors", UInt(16)), Column("received", DateTime()), Column("started", DateTime()), Column("release", LowCardinality(String())), Column("environment", LowCardinality(String())), ] aggregate_columns = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("started", DateTime()), Column("release", LowCardinality(String())), Column("environment", LowCardinality(String())), Column( "duration_quantiles", AggregateFunction("quantilesIf(0.5, 0.9)", UInt(32), UInt(8)), ),
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )