def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": ""}) mock_query_runner = Mock(return_value=query_result) def callback_func(args: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "selected_columns": ["type", "project_id"], } events = get_dataset("events") query = parse_query(query_body, events) events_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.EVENTS)), ) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) delegator = PipelineDelegator( query_pipeline_builders={ "events": events_pipeline, "errors": errors_pipeline }, selector_func=lambda query, referrer: ("events", ["errors"]), callback_func=mock_callback, ) with cv: request_settings = HTTPRequestSettings() delegator.build_execution_pipeline( Request( "", query_body, query, request_settings, "ref", ), mock_query_runner, ).execute() cv.wait(timeout=5) assert mock_query_runner.call_count == 2 assert mock_callback.call_args == call( query, request_settings, "ref", [ Result("events", query_result, ANY), Result("errors", query_result, ANY) ], )
def __init__( self, writable_storage_key: StorageKey, readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, ) -> None: writable_storage = get_writable_storage(writable_storage_key) readable_storage = get_storage(readable_storage_key) super().__init__( storages=[writable_storage, readable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="timestamp", )
def __init__(self) -> None: # The raw table we write onto, and that potentially we could # query. writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW) # The materialized view we query aggregate data from. materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. storage=materialized_storage, ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers), ), ), abstract_column_set=schema.get_columns(), join_relationships={ "contains": JoinRelationship( rhs_entity=EntityKey.SPANS, columns=[ ("project_id", "project_id"), ("span_id", "transaction_span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("event_id", "transaction_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: if settings.ERRORS_ROLLOUT_ALL: events_storage = get_writable_storage(StorageKey.ERRORS) pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=ErrorsQueryStorageSelector( mappers=errors_translators if custom_mappers is None else errors_translators.concat(custom_mappers))), ) else: events_storage = get_writable_storage(StorageKey.EVENTS) pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=event_translator if custom_mappers is None else event_translator.concat(custom_mappers))), ) schema = events_storage.get_table_writer().get_schema() columns = schema.get_columns() super().__init__( storages=[events_storage], query_pipeline_builder=pipeline_builder, abstract_column_set=columns, join_relationships={ "grouped": JoinRelationship( rhs_entity=EntityKey.GROUPEDMESSAGES, columns=[("project_id", "project_id"), ("group_id", "id")], join_type=JoinType.INNER, equivalences=[], ), "assigned": JoinRelationship( rhs_entity=EntityKey.GROUPASSIGNEE, columns=[("project_id", "project_id"), ("group_id", "group_id")], join_type=JoinType.INNER, equivalences=[], ), }, writable_storage=events_storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.SPANS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=TranslationMappers( subscriptables=[ SubscriptableMapper(None, "tags", None, "tags") ], ), ), ), abstract_column_set=ColumnSet( [ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ] ), join_relationships={ "contained": JoinRelationship( rhs_entity=EntityKey.TRANSACTIONS, columns=[ ("project_id", "project_id"), ("transaction_span_id", "span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("transaction_id", "event_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column=None, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage), ), abstract_column_set=storage.get_schema().get_columns(), join_relationships={}, writable_storage=None, )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.EVENTS) schema = storage.get_table_writer().get_schema() columns = schema.get_columns() events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=event_translator if custom_mappers is None else event_translator.concat(custom_mappers))), ) errors_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=ErrorsQueryStorageSelector( mappers=errors_translators if custom_mappers is None else errors_translators.concat(custom_mappers))), ) def selector_func(_query: Query) -> Tuple[str, List[str]]: if random.random() < float( state.get_config("errors_query_percentage", 0)): return "events", ["errors"] return "events", [] super().__init__( storages=[storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "errors": errors_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "errors"), ), abstract_column_set=columns, join_relationships={}, writable_storage=storage, )
def __init__(self) -> None: storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage), ), abstract_column_set=schema.get_columns(), join_relationships={}, writable_storage=storage, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.ERRORS) schema = storage.get_table_writer().get_schema() columns = schema.get_columns() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=errors_translators), ), abstract_column_set=columns, join_relationships={}, writable_storage=storage, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), ), abstract_column_set=storage.get_schema().get_columns(), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() mappers = (transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers)) v1_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=TransactionsQueryStorageSelector(mappers=mappers)), ) v2_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.TRANSACTIONS_V2), mappers=mappers, )) pipeline_builder: QueryPipelineBuilder[ ClickhouseQueryPlan] = PipelineDelegator( query_pipeline_builders={ "transactions_v1": v1_pipeline_builder, "transactions_v2": v2_pipeline_builder, }, selector_func=v2_selector_function, split_rate_limiter=True, ignore_secondary_exceptions=True, ) super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=schema.get_columns(), join_relationships={}, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="finish_ts", )
def __init__( self, writable_storage_key: Optional[StorageKey], readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, abstract_column_set: Optional[ColumnSet] = None, validators: Optional[Sequence[QueryValidator]] = None, ) -> None: writable_storage = (get_writable_storage(writable_storage_key) if writable_storage_key else None) readable_storage = get_storage(readable_storage_key) storages = [readable_storage] if writable_storage: storages.append(writable_storage) if abstract_column_set is None: abstract_column_set = ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("bucketed_time", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]) if validators is None: validators = [ EntityRequiredColumnValidator({"org_id", "project_id"}), GranularityValidator(minimum=10), ] super().__init__( storages=storages, query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=abstract_column_set, join_relationships={}, writable_storage=writable_storage, validators=validators, required_time_column="timestamp", )
def __init__(self, ) -> None: writable_storage = get_writable_storage(StorageKey.PROFILES) super().__init__( storages=[writable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( writable_storage)), abstract_column_set=profile_columns, join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator( {"organization_id", "project_id"}), ], required_time_column="received", )
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=SessionsQueryStorageSelector() ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, required_filter_columns=["org_id", "project_id"], required_time_column="started", )
def __init__(self) -> None: storage = get_storage(StorageKey.ORG_SESSIONS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage)), abstract_column_set=ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("started", DateTime()), ]), join_relationships={}, writable_storage=None, validators=None, required_time_column="started", )
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=materialized_storage, mappers=sessions_translators, ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, )
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() read_columns = read_schema.get_columns() time_columns = ColumnSet([("bucketed_started", DateTime())]) super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=SessionsQueryStorageSelector()), ), abstract_column_set=read_columns + time_columns, join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="started", validate_data_model=ColumnValidationMode.WARN, )
def __init__(self) -> None: storage = get_cdc_storage(StorageKey.GROUPASSIGNEES) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), ), abstract_column_set=schema.get_columns(), join_relationships={ "owns": JoinRelationship( rhs_entity=EntityKey.EVENTS, columns=[("project_id", "project_id"), ("group_id", "group_id")], join_type=JoinType.LEFT, equivalences=[], ) }, writable_storage=storage, required_filter_columns=None, required_time_column=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) super().__init__( storages=[discover_storage], query_pipeline_builder=discover_pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query, referrer: str) -> Tuple[str, List[str]]: # In case something goes wrong, set this to 1 to revert to the events storage. kill_rollout = state.get_config("errors_rollout_killswitch", 0) assert isinstance(kill_rollout, (int, str)) if int(kill_rollout): return "events", [] if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER: return "discover", [] if settings.ERRORS_ROLLOUT_ALL: return "discover", [] default_threshold = state.get_config("discover_query_percentage", 0) assert isinstance(default_threshold, (float, int, str)) threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get( referrer, default_threshold) if random.random() < float(threshold): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, required_filter_columns=["project_id"], required_time_column="timestamp", )
def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}}) def callback_func(primary: Optional[Tuple[str, QueryResult]], other: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "query": """ MATCH (events) SELECT type, project_id WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') """, "dataset": "events", } events = get_dataset("events") query, _ = parse_snql_query(query_body["query"], events) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) errors_ro_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS_RO)), ) delegator = PipelineDelegator( query_pipeline_builders={ "errors": errors_pipeline, "errors_ro": errors_ro_pipeline, }, selector_func=lambda query, referrer: ("errors", ["errors_ro"]), split_rate_limiter=True, ignore_secondary_exceptions=True, callback_func=mock_callback, ) runner_call_count = 0 runner_settings: MutableSequence[QuerySettings] = [] def query_runner( query: Union[Query, CompositeQuery[Table]], settings: QuerySettings, reader: Reader, ) -> QueryResult: nonlocal runner_call_count nonlocal runner_settings runner_call_count += 1 runner_settings.append(settings) return query_result set_config("pipeline_split_rate_limiter", 1) with cv: query_settings = HTTPQuerySettings(referrer="ref") delegator.build_execution_pipeline( Request( id="asd", original_body=query_body, query=query, snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo(get_app_id("ref"), "ref", None, None, None), ), query_runner, ).execute() cv.wait(timeout=5) assert runner_call_count == 2 assert len(runner_settings) == 2 settings, settings_ro = runner_settings # Validate that settings have been duplicated assert id(settings) != id(settings_ro) assert mock_callback.call_args == call( query, query_settings, "ref", Result("errors", query_result, ANY), [Result("errors_ro", query_result, ANY)], )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", ), ColumnToMapping(None, "geo_region", None, "contexts", "geo.region"), ColumnToMapping(None, "geo_city", None, "contexts", "geo.city"), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query) -> Tuple[str, List[str]]: if random.random() < float( state.get_config("discover_query_percentage", 0)): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: events_storage = get_writable_storage(StorageKey.EVENTS) errors_storage = get_writable_storage(StorageKey.ERRORS) schema = events_storage.get_table_writer().get_schema() columns = schema.get_columns() events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=event_translator if custom_mappers is None else event_translator.concat(custom_mappers) ) ), ) errors_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=ErrorsQueryStorageSelector( mappers=errors_translators if custom_mappers is None else errors_translators.concat(custom_mappers) ) ), ) def selector_func(_query: Query, referrer: str) -> Tuple[str, List[str]]: # In case something goes wrong, set this to 1 to revert to the events storage. kill_rollout = state.get_config("errors_rollout_killswitch", 0) assert isinstance(kill_rollout, (int, str)) if int(kill_rollout): return "events", [] if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER: return "errors", [] if settings.ERRORS_ROLLOUT_ALL: return "errors", [] default_threshold = state.get_config("errors_query_percentage", 0) assert isinstance(default_threshold, (float, int, str)) threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get( referrer, default_threshold ) if random.random() < float(threshold): return "events", ["errors"] return "events", [] def writable_storage() -> WritableTableStorage: if settings.ERRORS_ROLLOUT_WRITABLE_STORAGE: return get_writable_storage(StorageKey.ERRORS) else: return get_writable_storage(StorageKey.EVENTS) super().__init__( storages=[events_storage, errors_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "errors": errors_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "errors"), ), abstract_column_set=columns, join_relationships={ "grouped": JoinRelationship( rhs_entity=EntityKey.GROUPEDMESSAGES, columns=[("project_id", "project_id"), ("group_id", "id")], join_type=JoinType.INNER, equivalences=[], ), "assigned": JoinRelationship( rhs_entity=EntityKey.GROUPASSIGNEE, columns=[("project_id", "project_id"), ("group_id", "group_id")], join_type=JoinType.INNER, equivalences=[], ), }, writable_storage=writable_storage(), required_filter_columns=["project_id"], required_time_column="timestamp", )