def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("hierarchical_hashes", Array(UUID())), after="primary_hash", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_dist", column=Column("hierarchical_hashes", Array(FixedString(32)),), after="primary_hash", ), ]
def __init__(self) -> None: self.__common_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", Nullable(String())), ("timestamp", DateTime()), ("platform", Nullable(String())), ("environment", Nullable(String())), ("release", Nullable(String())), ("dist", Nullable(String())), ("user", Nullable(String())), ("transaction", Nullable(String())), ("message", Nullable(String())), ("title", Nullable(String())), # User ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # SDK ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), # geo location context ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ] ) self.__events_columns = ColumnSet( [ ("group_id", Nullable(UInt(64))), ("primary_hash", Nullable(FixedString(32))), # Promoted tags ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ("search_message", Nullable(String())), ("location", Nullable(String())), ("culprit", Nullable(String())), ("received", Nullable(DateTime())), ("sdk_integrations", Nullable(Array(String()))), ("version", Nullable(String())), # exception interface ( "exception_stacks", Nested( [ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ] ), ), ( "exception_frames", Nested( [ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ] ), ), ("modules", Nested([("name", String()), ("version", String())])), ] ) self.__transactions_columns = ColumnSet( [ ("trace_id", Nullable(UUID())), ("span_id", Nullable(UInt(64))), ("transaction_hash", Nullable(UInt(64))), ("transaction_op", Nullable(String())), ("transaction_status", Nullable(UInt(8))), ("duration", Nullable(UInt(32))), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ] ) events_storage = get_storage(StorageKey.EVENTS) events_ro_storage = get_storage(StorageKey.EVENTS_RO) transactions_storage = get_storage(StorageKey.TRANSACTIONS) self.__time_group_columns: Mapping[str, str] = {} self.__time_parse_columns = ("timestamp",) super().__init__( storages=[events_storage, transactions_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=DiscoverQueryStorageSelector( events_table=events_storage, events_ro_table=events_ro_storage, abstract_events_columns=self.__events_columns, transactions_table=transactions_storage, abstract_transactions_columns=self.__transactions_columns, ), ), abstract_column_set=( self.__common_columns + self.__events_columns + self.__transactions_columns ), writable_storage=None, )
("device_brand", Nullable(String())), ("device_locale", Nullable(String())), ("device_uuid", Nullable(String())), ("device_model_id", Nullable(String())), ("device_arch", Nullable(String())), ("device_battery_level", Nullable(Float(32))), ("device_orientation", Nullable(String())), ("device_simulator", Nullable(UInt(8))), ("device_online", Nullable(UInt(8))), ("device_charging", Nullable(UInt(8))), ] ) required_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ] ) all_columns = ( required_columns + [ # required for non-deleted ("platform", Nullable(String())), ("message", Nullable(String())), ("primary_hash", Nullable(FixedString(32))),
("device_brand", String(Modifiers(nullable=True))), ("device_locale", String(Modifiers(nullable=True))), ("device_uuid", String(Modifiers(nullable=True))), ("device_model_id", String(Modifiers(nullable=True))), ("device_arch", String(Modifiers(nullable=True))), ("device_battery_level", Float(32, Modifiers(nullable=True))), ("device_orientation", String(Modifiers(nullable=True))), ("device_simulator", UInt(8, Modifiers(nullable=True))), ("device_online", UInt(8, Modifiers(nullable=True))), ("device_charging", UInt(8, Modifiers(nullable=True))), ] ) required_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ] ) all_columns = ( required_columns + [ # required for non-deleted ("platform", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("primary_hash", FixedString(32, Modifiers(nullable=True))),
("os_kernel_version", String(Modifiers(nullable=True))), ("device_name", String(Modifiers(nullable=True))), ("device_brand", String(Modifiers(nullable=True))), ("device_locale", String(Modifiers(nullable=True))), ("device_uuid", String(Modifiers(nullable=True))), ("device_model_id", String(Modifiers(nullable=True))), ("device_arch", String(Modifiers(nullable=True))), ("device_battery_level", Float(32, Modifiers(nullable=True))), ("device_orientation", String(Modifiers(nullable=True))), ("device_simulator", UInt(8, Modifiers(nullable=True))), ("device_online", UInt(8, Modifiers(nullable=True))), ("device_charging", UInt(8, Modifiers(nullable=True))), ]) required_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ]) all_columns = ( required_columns + [ # required for non-deleted ("platform", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("primary_hash", FixedString(32, Modifiers(nullable=True))), ("hierarchical_hashes", Array(FixedString(32))), ("received", DateTime(Modifiers(nullable=True))),
def __init__(self) -> None: metadata_columns = ColumnSet([ # optional stream related data ("offset", Nullable(UInt(64))), ("partition", Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), # future name: device_id? ("transaction", Nullable(String())), ("environment", Nullable(String())), ("sentry:release", Nullable(String())), ("sentry:dist", Nullable(String())), ("sentry:user", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ("app_device", Nullable(String())), ("device", Nullable(String())), ("device_family", Nullable(String())), ("runtime", Nullable(String())), ("runtime_name", Nullable(String())), ("browser", Nullable(String())), ("browser_name", Nullable(String())), ("os", Nullable(String())), ("os_name", Nullable(String())), ("os_rooted", Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ("os_build", Nullable(String())), ("os_kernel_version", Nullable(String())), ("device_name", Nullable(String())), ("device_brand", Nullable(String())), ("device_locale", Nullable(String())), ("device_uuid", Nullable(String())), ("device_model_id", Nullable(String())), ("device_arch", Nullable(String())), ("device_battery_level", Nullable(Float(32))), ("device_orientation", Nullable(String())), ("device_simulator", Nullable(UInt(8))), ("device_online", Nullable(UInt(8))), ("device_charging", Nullable(UInt(8))), ]) required_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("group_id", UInt(64)), ("timestamp", DateTime()), ("deleted", UInt(8)), ("retention_days", UInt(16)), ]) all_columns = ( required_columns + [ # required for non-deleted ("platform", Nullable(String())), ("message", Nullable(String())), ("primary_hash", Nullable(FixedString(32))), ("received", Nullable(DateTime())), ("search_message", Nullable(String())), ("title", Nullable(String())), ("location", Nullable(String())), # optional user ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # optional geo ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), ("type", Nullable(String())), ("version", Nullable(String())), ] + metadata_columns + promoted_context_columns + promoted_tag_columns + promoted_context_tag_columns + [ # other tags ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), # other context ("contexts", Nested([("key", String()), ("value", String())])), # http interface ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # exception interface ( "exception_stacks", Nested([ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ]), ), ( "exception_frames", Nested([ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ]), ), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ("culprit", Nullable(String())), ("sdk_integrations", Array(String())), ("modules", Nested([("name", String()), ("version", String())])), ]) sample_expr = "cityHash64(toString(event_id))" schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name="sentry_local", dist_table_name="sentry_dist", mandatory_conditions=[("deleted", "=", 0)], prewhere_candidates=[ "event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr, partition_by= "(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))", version_column="deleted", sample_expr=sample_expr, migration_function=events_migrations, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ), ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ "time": "timestamp", "rtime": "received" }, time_parse_columns=("timestamp", "received"), ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )
Nullable, String, UInt, UUID, WithCodecs, WithDefault, ) from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), (("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)])), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions (("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", UInt(8))), (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", UUID(), UInt(8))), (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""), AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))), # Array (("Array(String)", "", "", ""), Array(String())),
IPv6, String, UInt, ) from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), ( ("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)]), ), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions ( ("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", [UInt(8)]), ), ( ("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", [UUID(), UInt(8)]), ),
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) super().__init__( storages=[discover_storage], query_pipeline_builder=discover_pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", ), ColumnToMapping(None, "geo_region", None, "contexts", "geo.region"), ColumnToMapping(None, "geo_city", None, "contexts", "geo.city"), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query) -> Tuple[str, List[str]]: if random.random() < float( state.get_config("discover_query_percentage", 0)): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, )
pytest.param( IPv4(None), IPv4(), IPv4(Modifier(nullable=True)), "IPv4", id="IPs", ), pytest.param( IPv6(None), IPv6(), IPv6(Modifier(nullable=True)), "IPv6", id="IPs", ), pytest.param( FixedString(32, Modifier(nullable=True)), FixedString(32), FixedString(64, Modifier(nullable=True)), "Nullable(FixedString(32))", id="fixed strings", ), pytest.param( UInt(8, Modifier(nullable=True)), UInt(8), UInt(16, Modifier(nullable=True)), "Nullable(UInt8)", id="integers", ), pytest.param( Float(64, Modifier(nullable=True)), Float(64),
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) super().__init__( storages=[events_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), writable_storage=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query, referrer: str) -> Tuple[str, List[str]]: # In case something goes wrong, set this to 1 to revert to the events storage. kill_rollout = state.get_config("errors_rollout_killswitch", 0) assert isinstance(kill_rollout, (int, str)) if int(kill_rollout): return "events", [] if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER: return "discover", [] if settings.ERRORS_ROLLOUT_ALL: return "discover", [] default_threshold = state.get_config("discover_query_percentage", 0) assert isinstance(default_threshold, (float, int, str)) threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get( referrer, default_threshold) if random.random() < float(threshold): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, required_filter_columns=["project_id"], required_time_column="timestamp", )
("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("transaction_name", WithDefault(LowCardinality(String()), "''")), ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"),), ("span_id", Nullable(UInt(64))), ("trace_id", Nullable(UUID())), ("partition", UInt(16)), ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", FixedString(32)), ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")), ("event_string", WithCodecs(String(), ["NONE"])), ("received", DateTime()), ("message", String()), ("title", String()), ("culprit", String()), ("level", LowCardinality(String())), ("location", Nullable(String())), ("version", LowCardinality(Nullable(String()))), ("type", LowCardinality(String())), ( "exception_stacks", Nested( [ ("type", Nullable(String())),
def __init__(self): metadata_columns = ColumnSet([ # optional stream related data ('offset', Nullable(UInt(64))), ('partition', Nullable(UInt(16))), ]) promoted_tag_columns = ColumnSet([ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ('level', Nullable(String())), ('logger', Nullable(String())), ('server_name', Nullable(String())), # future name: device_id? ('transaction', Nullable(String())), ('environment', Nullable(String())), ('sentry:release', Nullable(String())), ('sentry:dist', Nullable(String())), ('sentry:user', Nullable(String())), ('site', Nullable(String())), ('url', Nullable(String())), ]) promoted_context_tag_columns = ColumnSet([ # These are promoted tags that come in in `tags`, but are more closely # related to contexts. To avoid naming confusion with Clickhouse nested # columns, they are stored in the database with s/./_/ # promoted tags ('app_device', Nullable(String())), ('device', Nullable(String())), ('device_family', Nullable(String())), ('runtime', Nullable(String())), ('runtime_name', Nullable(String())), ('browser', Nullable(String())), ('browser_name', Nullable(String())), ('os', Nullable(String())), ('os_name', Nullable(String())), ('os_rooted', Nullable(UInt(8))), ]) promoted_context_columns = ColumnSet([ ('os_build', Nullable(String())), ('os_kernel_version', Nullable(String())), ('device_name', Nullable(String())), ('device_brand', Nullable(String())), ('device_locale', Nullable(String())), ('device_uuid', Nullable(String())), ('device_model_id', Nullable(String())), ('device_arch', Nullable(String())), ('device_battery_level', Nullable(Float(32))), ('device_orientation', Nullable(String())), ('device_simulator', Nullable(UInt(8))), ('device_online', Nullable(UInt(8))), ('device_charging', Nullable(UInt(8))), ]) required_columns = ColumnSet([ ('event_id', FixedString(32)), ('project_id', UInt(64)), ('group_id', UInt(64)), ('timestamp', DateTime()), ('deleted', UInt(8)), ('retention_days', UInt(16)), ]) all_columns = required_columns + [ # required for non-deleted ('platform', Nullable(String())), ('message', Nullable(String())), ('primary_hash', Nullable(FixedString(32))), ('received', Nullable(DateTime())), ('search_message', Nullable(String())), ('title', Nullable(String())), ('location', Nullable(String())), # optional user ('user_id', Nullable(String())), ('username', Nullable(String())), ('email', Nullable(String())), ('ip_address', Nullable(String())), # optional geo ('geo_country_code', Nullable(String())), ('geo_region', Nullable(String())), ('geo_city', Nullable(String())), ('sdk_name', Nullable(String())), ('sdk_version', Nullable(String())), ('type', Nullable(String())), ('version', Nullable(String())), ] + metadata_columns \ + promoted_context_columns \ + promoted_tag_columns \ + promoted_context_tag_columns \ + [ # other tags ('tags', Nested([ ('key', String()), ('value', String()), ])), # other context ('contexts', Nested([ ('key', String()), ('value', String()), ])), # http interface ('http_method', Nullable(String())), ('http_referer', Nullable(String())), # exception interface ('exception_stacks', Nested([ ('type', Nullable(String())), ('value', Nullable(String())), ('mechanism_type', Nullable(String())), ('mechanism_handled', Nullable(UInt(8))), ])), ('exception_frames', Nested([ ('abs_path', Nullable(String())), ('filename', Nullable(String())), ('package', Nullable(String())), ('module', Nullable(String())), ('function', Nullable(String())), ('in_app', Nullable(UInt(8))), ('colno', Nullable(UInt(32))), ('lineno', Nullable(UInt(32))), ('stack_level', UInt(16)), ])), # These are columns we added later in the life of the (current) production # database. They don't necessarily belong here in a logical/readability sense # but they are here to match the order of columns in production becase # `insert_distributed_sync` is very sensitive to column existence and ordering. ('culprit', Nullable(String())), ('sdk_integrations', Array(String())), ('modules', Nested([ ('name', String()), ('version', String()), ])), ] sample_expr = 'cityHash64(toString(event_id))' schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name='sentry_local', dist_table_name='sentry_dist', mandatory_conditions=[('deleted', '=', 0)], order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr, partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))', version_column='deleted', sample_expr=sample_expr, migration_function=events_migrations) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=EventsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="event-replacements", commit_log_topic="snuba-commit-log", ) ) super(EventsDataset, self).__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ 'time': 'timestamp', 'rtime': 'received' }, time_parse_columns=('timestamp', 'received') ) self.__metadata_columns = metadata_columns self.__promoted_tag_columns = promoted_tag_columns self.__promoted_context_tag_columns = promoted_context_tag_columns self.__promoted_context_columns = promoted_context_columns self.__required_columns = required_columns self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )
def visit_fixedstring( self, node: Node, visited_children: Iterable[Any] ) -> ColumnType: size = int(node.children[3].text) return FixedString(size)
def __init__(self) -> None: all_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized( UInt(64), "cityHash64(toString(event_id))", ), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ( "user_hash", Materialized(UInt(64), "cityHash64(user)"), ), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", LowCardinality(Nullable(String()))), ("sdk_version", LowCardinality(Nullable(String()))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("transaction_name", WithDefault(LowCardinality(String()), "''")), ( "transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"), ), ("span_id", Nullable(UInt(64))), ("trace_id", Nullable(UUID())), ("partition", UInt(16)), ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", FixedString(32)), ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")), ("event_string", WithCodecs(String(), ["NONE"])), ("received", DateTime()), ("message", String()), ("title", String()), ("culprit", String()), ("level", LowCardinality(String())), ("location", Nullable(String())), ("version", LowCardinality(Nullable(String()))), ("type", LowCardinality(String())), ( "exception_stacks", Nested([ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ]), ), ( "exception_frames", Nested([ ("abs_path", Nullable(String())), ("colno", Nullable(UInt(32))), ("filename", Nullable(String())), ("function", Nullable(String())), ("lineno", Nullable(UInt(32))), ("in_app", Nullable(UInt(8))), ("package", Nullable(String())), ("module", Nullable(String())), ("stack_level", Nullable(UInt(16))), ]), ), ("sdk_integrations", Array(String())), ("modules", Nested([("name", String()), ("version", String())])), ]) self.__promoted_tag_columns = { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", "transaction": "transaction_name", "level": "level", } schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name="errors_local", dist_table_name="errors_dist", mandatory_conditions=[("deleted", "=", 0)], prewhere_candidates=[ "event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], order_by= "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)", partition_by= "(toMonday(timestamp), if(retention_days = 30, 30, 90))", version_column="deleted", sample_expr="event_hash", ttl_expr="timestamp + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(self.__promoted_tag_columns), default_topic="events", ), ) super().__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ "time": "timestamp", "rtime": "received" }, time_parse_columns=("timestamp", "received"), ) self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )