def test_entity_validation(key: EntityKey, condition: Optional[Expression]) -> None: query = LogicalQuery( QueryEntity(key, get_entity(key).get_data_model()), selected_columns=[ SelectedExpression("time", Column("_snuba_timestamp", None, "timestamp")), ], condition=condition, ) validator = EntityRequiredColumnValidator({"project_id"}) validator.validate(query)
def __init__( self, writable_storage_key: StorageKey, readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, ) -> None: writable_storage = get_writable_storage(writable_storage_key) readable_storage = get_storage(readable_storage_key) super().__init__( storages=[writable_storage, readable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="timestamp", )
def __init__(self) -> None: # The raw table we write onto, and that potentially we could # query. writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW) # The materialized view we query aggregate data from. materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. storage=materialized_storage, ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers), ), ), abstract_column_set=schema.get_columns(), join_relationships={ "contains": JoinRelationship( rhs_entity=EntityKey.SPANS, columns=[ ("project_id", "project_id"), ("span_id", "transaction_span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("event_id", "transaction_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="finish_ts", )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.SPANS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=TranslationMappers( subscriptables=[ SubscriptableMapper(None, "tags", None, "tags") ], ), ), ), abstract_column_set=ColumnSet( [ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ] ), join_relationships={ "contained": JoinRelationship( rhs_entity=EntityKey.TRANSACTIONS, columns=[ ("project_id", "project_id"), ("transaction_span_id", "span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("transaction_id", "event_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column=None, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), ), abstract_column_set=storage.get_schema().get_columns(), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__( self, writable_storage_key: Optional[StorageKey], readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, abstract_column_set: Optional[ColumnSet] = None, validators: Optional[Sequence[QueryValidator]] = None, ) -> None: writable_storage = (get_writable_storage(writable_storage_key) if writable_storage_key else None) readable_storage = get_storage(readable_storage_key) storages = [readable_storage] if writable_storage: storages.append(writable_storage) if abstract_column_set is None: abstract_column_set = ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("bucketed_time", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]) if validators is None: validators = [ EntityRequiredColumnValidator({"org_id", "project_id"}), GranularityValidator(minimum=10), ] super().__init__( storages=storages, query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=abstract_column_set, join_relationships={}, writable_storage=writable_storage, validators=validators, required_time_column="timestamp", )
def __init__(self, ) -> None: writable_storage = get_writable_storage(StorageKey.PROFILES) super().__init__( storages=[writable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( writable_storage)), abstract_column_set=profile_columns, join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator( {"organization_id", "project_id"}), ], required_time_column="received", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: if settings.ERRORS_ROLLOUT_ALL: events_storage = get_writable_storage(StorageKey.ERRORS) pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=ErrorsQueryStorageSelector( mappers=errors_translators if custom_mappers is None else errors_translators.concat(custom_mappers))), ) else: events_storage = get_writable_storage(StorageKey.EVENTS) pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=event_translator if custom_mappers is None else event_translator.concat(custom_mappers))), ) schema = events_storage.get_table_writer().get_schema() columns = schema.get_columns() super().__init__( storages=[events_storage], query_pipeline_builder=pipeline_builder, abstract_column_set=columns, join_relationships={ "grouped": JoinRelationship( rhs_entity=EntityKey.GROUPEDMESSAGES, columns=[("project_id", "project_id"), ("group_id", "id")], join_type=JoinType.INNER, equivalences=[], ), "assigned": JoinRelationship( rhs_entity=EntityKey.GROUPASSIGNEE, columns=[("project_id", "project_id"), ("group_id", "group_id")], join_type=JoinType.INNER, equivalences=[], ), }, writable_storage=events_storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=SessionsQueryStorageSelector()), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="started", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() mappers = (transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers)) v1_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=TransactionsQueryStorageSelector(mappers=mappers)), ) v2_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.TRANSACTIONS_V2), mappers=mappers, )) pipeline_builder: QueryPipelineBuilder[ ClickhouseQueryPlan] = PipelineDelegator( query_pipeline_builders={ "transactions_v1": v1_pipeline_builder, "transactions_v2": v2_pipeline_builder, }, selector_func=v2_selector_function, split_rate_limiter=True, ignore_secondary_exceptions=True, ) super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=schema.get_columns(), join_relationships={}, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="finish_ts", )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) super().__init__( storages=[discover_storage], query_pipeline_builder=discover_pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )