Ejemplo n.º 1
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": ""})
    mock_query_runner = Mock(return_value=query_result)

    def callback_func(args: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "selected_columns": ["type", "project_id"],
    }

    events = get_dataset("events")
    query = parse_query(query_body, events)

    events_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.EVENTS)), )

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "events": events_pipeline,
            "errors": errors_pipeline
        },
        selector_func=lambda query, referrer: ("events", ["errors"]),
        callback_func=mock_callback,
    )

    with cv:
        request_settings = HTTPRequestSettings()
        delegator.build_execution_pipeline(
            Request(
                "",
                query_body,
                query,
                request_settings,
                "ref",
            ),
            mock_query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert mock_query_runner.call_count == 2

    assert mock_callback.call_args == call(
        query,
        request_settings,
        "ref",
        [
            Result("events", query_result, ANY),
            Result("errors", query_result, ANY)
        ],
    )
Ejemplo n.º 2
0
    def __init__(
        self,
        writable_storage_key: StorageKey,
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
    ) -> None:
        writable_storage = get_writable_storage(writable_storage_key)
        readable_storage = get_storage(readable_storage_key)

        super().__init__(
            storages=[writable_storage, readable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ]),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="timestamp",
        )
Ejemplo n.º 3
0
    def __init__(self) -> None:

        # The raw table we write onto, and that potentially we could
        # query.
        writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW)

        # The materialized view we query aggregate data from.
        materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY)
        read_schema = materialized_storage.get_schema()
        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    # TODO: Once we are ready to expose the raw data model and select whether to use
                    # materialized storage or the raw one here, replace this with a custom storage
                    # selector that decides when to use the materialized data.
                    storage=materialized_storage,
                ),
            ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 4
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=transaction_translator if custom_mappers is None
                    else transaction_translator.concat(custom_mappers),
                ), ),
            abstract_column_set=schema.get_columns(),
            join_relationships={
                "contains":
                JoinRelationship(
                    rhs_entity=EntityKey.SPANS,
                    columns=[
                        ("project_id", "project_id"),
                        ("span_id", "transaction_span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("event_id", "transaction_id"),
                        ColumnEquivalence("transaction_name",
                                          "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
        )
Ejemplo n.º 5
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        if settings.ERRORS_ROLLOUT_ALL:
            events_storage = get_writable_storage(StorageKey.ERRORS)
            pipeline_builder = SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=ErrorsQueryStorageSelector(
                        mappers=errors_translators if custom_mappers is None
                        else errors_translators.concat(custom_mappers))), )
        else:
            events_storage = get_writable_storage(StorageKey.EVENTS)
            pipeline_builder = SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=EventsQueryStorageSelector(
                        mappers=event_translator if custom_mappers is None else
                        event_translator.concat(custom_mappers))), )

        schema = events_storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        super().__init__(
            storages=[events_storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=columns,
            join_relationships={
                "grouped":
                JoinRelationship(
                    rhs_entity=EntityKey.GROUPEDMESSAGES,
                    columns=[("project_id", "project_id"), ("group_id", "id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
                "assigned":
                JoinRelationship(
                    rhs_entity=EntityKey.GROUPASSIGNEE,
                    columns=[("project_id", "project_id"),
                             ("group_id", "group_id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
            },
            writable_storage=events_storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 6
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column=None,
        )
Ejemplo n.º 7
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage), ),
            abstract_column_set=storage.get_schema().get_columns(),
            join_relationships={},
            writable_storage=None,
        )
Ejemplo n.º 8
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.EVENTS)
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=event_translator if custom_mappers is None else
                    event_translator.concat(custom_mappers))), )

        errors_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=ErrorsQueryStorageSelector(
                    mappers=errors_translators if custom_mappers is None else
                    errors_translators.concat(custom_mappers))), )

        def selector_func(_query: Query) -> Tuple[str, List[str]]:
            if random.random() < float(
                    state.get_config("errors_query_percentage", 0)):
                return "events", ["errors"]

            return "events", []

        super().__init__(
            storages=[storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "errors": errors_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "errors"),
            ),
            abstract_column_set=columns,
            join_relationships={},
            writable_storage=storage,
        )
Ejemplo n.º 9
0
    def __init__(self) -> None:
        storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage), ),
            abstract_column_set=schema.get_columns(),
            join_relationships={},
            writable_storage=storage,
        )
Ejemplo n.º 10
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.ERRORS)
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage, mappers=errors_translators), ),
            abstract_column_set=columns,
            join_relationships={},
            writable_storage=storage,
        )
Ejemplo n.º 11
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            ),
            abstract_column_set=storage.get_schema().get_columns(),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 12
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()
        mappers = (transaction_translator if custom_mappers is None else
                   transaction_translator.concat(custom_mappers))

        v1_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=TransactionsQueryStorageSelector(mappers=mappers)), )

        v2_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=get_storage(StorageKey.TRANSACTIONS_V2),
                mappers=mappers,
            ))

        pipeline_builder: QueryPipelineBuilder[
            ClickhouseQueryPlan] = PipelineDelegator(
                query_pipeline_builders={
                    "transactions_v1": v1_pipeline_builder,
                    "transactions_v2": v2_pipeline_builder,
                },
                selector_func=v2_selector_function,
                split_rate_limiter=True,
                ignore_secondary_exceptions=True,
            )

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=schema.get_columns(),
            join_relationships={},
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="finish_ts",
        )
Ejemplo n.º 13
0
    def __init__(
        self,
        writable_storage_key: Optional[StorageKey],
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
        abstract_column_set: Optional[ColumnSet] = None,
        validators: Optional[Sequence[QueryValidator]] = None,
    ) -> None:
        writable_storage = (get_writable_storage(writable_storage_key)
                            if writable_storage_key else None)
        readable_storage = get_storage(readable_storage_key)
        storages = [readable_storage]
        if writable_storage:
            storages.append(writable_storage)

        if abstract_column_set is None:
            abstract_column_set = ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("bucketed_time", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ])

        if validators is None:
            validators = [
                EntityRequiredColumnValidator({"org_id", "project_id"}),
                GranularityValidator(minimum=10),
            ]

        super().__init__(
            storages=storages,
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=abstract_column_set,
            join_relationships={},
            writable_storage=writable_storage,
            validators=validators,
            required_time_column="timestamp",
        )
Ejemplo n.º 14
0
    def __init__(self, ) -> None:
        writable_storage = get_writable_storage(StorageKey.PROFILES)

        super().__init__(
            storages=[writable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    writable_storage)),
            abstract_column_set=profile_columns,
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator(
                    {"organization_id", "project_id"}),
            ],
            required_time_column="received",
        )
Ejemplo n.º 15
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=SessionsQueryStorageSelector()
                ),
            ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
            required_filter_columns=["org_id", "project_id"],
            required_time_column="started",
        )
Ejemplo n.º 16
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.ORG_SESSIONS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage)),
            abstract_column_set=ColumnSet([
                ("org_id", UInt(64)),
                ("project_id", UInt(64)),
                ("started", DateTime()),
            ]),
            join_relationships={},
            writable_storage=None,
            validators=None,
            required_time_column="started",
        )
Ejemplo n.º 17
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        super().__init__(
            storages=[writable_storage, materialized_storage],
            # TODO: Once we are ready to expose the raw data model and select whether to use
            # materialized storage or the raw one here, replace this with a custom storage
            # selector that decides when to use the materialized data.
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=materialized_storage,
                    mappers=sessions_translators,
                ), ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
        )
Ejemplo n.º 18
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        read_columns = read_schema.get_columns()
        time_columns = ColumnSet([("bucketed_started", DateTime())])
        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=SessionsQueryStorageSelector()), ),
            abstract_column_set=read_columns + time_columns,
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="started",
            validate_data_model=ColumnValidationMode.WARN,
        )
Ejemplo n.º 19
0
    def __init__(self) -> None:
        storage = get_cdc_storage(StorageKey.GROUPASSIGNEES)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            ),
            abstract_column_set=schema.get_columns(),
            join_relationships={
                "owns": JoinRelationship(
                    rhs_entity=EntityKey.EVENTS,
                    columns=[("project_id", "project_id"), ("group_id", "group_id")],
                    join_type=JoinType.LEFT,
                    equivalences=[],
                )
            },
            writable_storage=storage,
            required_filter_columns=None,
            required_time_column=None,
        )
Ejemplo n.º 20
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
            ("span_id", UInt(64, Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        super().__init__(
            storages=[discover_storage],
            query_pipeline_builder=discover_pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 21
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)

        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=discover_storage,
                mappers=events_translation_mappers.concat(
                    transaction_translation_mappers).
                concat(null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
            ))

        def selector_func(_query: Query,
                          referrer: str) -> Tuple[str, List[str]]:
            # In case something goes wrong, set this to 1 to revert to the events storage.
            kill_rollout = state.get_config("errors_rollout_killswitch", 0)
            assert isinstance(kill_rollout, (int, str))
            if int(kill_rollout):
                return "events", []

            if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER:
                return "discover", []

            if settings.ERRORS_ROLLOUT_ALL:
                return "discover", []

            default_threshold = state.get_config("discover_query_percentage",
                                                 0)
            assert isinstance(default_threshold, (float, int, str))

            threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get(
                referrer, default_threshold)

            if random.random() < float(threshold):
                return "events", ["discover"]

            return "events", []

        super().__init__(
            storages=[events_storage, discover_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "discover": discover_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "discover"),
            ),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            required_filter_columns=["project_id"],
            required_time_column="timestamp",
        )
Ejemplo n.º 22
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}})

    def callback_func(primary: Optional[Tuple[str, QueryResult]],
                      other: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "query": """
        MATCH (events)
        SELECT type, project_id
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events = get_dataset("events")
    query, _ = parse_snql_query(query_body["query"], events)

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    errors_ro_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS_RO)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "errors": errors_pipeline,
            "errors_ro": errors_ro_pipeline,
        },
        selector_func=lambda query, referrer: ("errors", ["errors_ro"]),
        split_rate_limiter=True,
        ignore_secondary_exceptions=True,
        callback_func=mock_callback,
    )

    runner_call_count = 0
    runner_settings: MutableSequence[QuerySettings] = []

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        nonlocal runner_call_count
        nonlocal runner_settings

        runner_call_count += 1
        runner_settings.append(settings)
        return query_result

    set_config("pipeline_split_rate_limiter", 1)

    with cv:
        query_settings = HTTPQuerySettings(referrer="ref")
        delegator.build_execution_pipeline(
            Request(
                id="asd",
                original_body=query_body,
                query=query,
                snql_anonymized="",
                query_settings=query_settings,
                attribution_info=AttributionInfo(get_app_id("ref"), "ref",
                                                 None, None, None),
            ),
            query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert runner_call_count == 2
    assert len(runner_settings) == 2
    settings, settings_ro = runner_settings
    # Validate that settings have been duplicated
    assert id(settings) != id(settings_ro)

    assert mock_callback.call_args == call(
        query,
        query_settings,
        "ref",
        Result("errors", query_result, ANY),
        [Result("errors_ro", query_result, ANY)],
    )
Ejemplo n.º 23
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)

        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=discover_storage,
                mappers=events_translation_mappers.concat(
                    transaction_translation_mappers).
                concat(null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                        ),
                        ColumnToMapping(None, "geo_region", None, "contexts",
                                        "geo.region"),
                        ColumnToMapping(None, "geo_city", None, "contexts",
                                        "geo.city"),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
            ))

        def selector_func(_query: Query) -> Tuple[str, List[str]]:
            if random.random() < float(
                    state.get_config("discover_query_percentage", 0)):
                return "events", ["discover"]

            return "events", []

        super().__init__(
            storages=[events_storage, discover_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "discover": discover_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "discover"),
            ),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
        )
Ejemplo n.º 24
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder]
        if settings.ERRORS_ROLLOUT_ALL:
            storage = discover_storage
            sampled_pipeline_builder = SampledSimplePipelineBuilder(
                query_plan_builder=discover_storage_plan_builder)

            pipeline_builder = PipelineDelegator(
                query_pipeline_builders={
                    "primary": discover_pipeline_builder,
                    "sampler": sampled_pipeline_builder,
                },
                selector_func=sampling_selector_func,
                callback_func=sampling_callback_func,
            )
        else:
            storage = events_storage
            pipeline_builder = events_pipeline_builder

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Ejemplo n.º 25
0
    def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None:
        events_storage = get_writable_storage(StorageKey.EVENTS)
        errors_storage = get_writable_storage(StorageKey.ERRORS)
        schema = events_storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=event_translator
                    if custom_mappers is None
                    else event_translator.concat(custom_mappers)
                )
            ),
        )

        errors_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=ErrorsQueryStorageSelector(
                    mappers=errors_translators
                    if custom_mappers is None
                    else errors_translators.concat(custom_mappers)
                )
            ),
        )

        def selector_func(_query: Query, referrer: str) -> Tuple[str, List[str]]:
            # In case something goes wrong, set this to 1 to revert to the events storage.
            kill_rollout = state.get_config("errors_rollout_killswitch", 0)
            assert isinstance(kill_rollout, (int, str))
            if int(kill_rollout):
                return "events", []

            if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER:
                return "errors", []

            if settings.ERRORS_ROLLOUT_ALL:
                return "errors", []

            default_threshold = state.get_config("errors_query_percentage", 0)
            assert isinstance(default_threshold, (float, int, str))
            threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get(
                referrer, default_threshold
            )

            if random.random() < float(threshold):
                return "events", ["errors"]

            return "events", []

        def writable_storage() -> WritableTableStorage:
            if settings.ERRORS_ROLLOUT_WRITABLE_STORAGE:
                return get_writable_storage(StorageKey.ERRORS)
            else:
                return get_writable_storage(StorageKey.EVENTS)

        super().__init__(
            storages=[events_storage, errors_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "errors": errors_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "errors"),
            ),
            abstract_column_set=columns,
            join_relationships={
                "grouped": JoinRelationship(
                    rhs_entity=EntityKey.GROUPEDMESSAGES,
                    columns=[("project_id", "project_id"), ("group_id", "id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
                "assigned": JoinRelationship(
                    rhs_entity=EntityKey.GROUPASSIGNEE,
                    columns=[("project_id", "project_id"), ("group_id", "group_id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
            },
            writable_storage=writable_storage(),
            required_filter_columns=["project_id"],
            required_time_column="timestamp",
        )