def test_entity_validation(key: EntityKey,
                           condition: Optional[Expression]) -> None:
    query = LogicalQuery(
        QueryEntity(key,
                    get_entity(key).get_data_model()),
        selected_columns=[
            SelectedExpression("time",
                               Column("_snuba_timestamp", None, "timestamp")),
        ],
        condition=condition,
    )

    validator = EntityRequiredColumnValidator({"project_id"})
    validator.validate(query)
Esempio n. 2
0
    def __init__(
        self,
        writable_storage_key: StorageKey,
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
    ) -> None:
        writable_storage = get_writable_storage(writable_storage_key)
        readable_storage = get_storage(readable_storage_key)

        super().__init__(
            storages=[writable_storage, readable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ]),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="timestamp",
        )
Esempio n. 3
0
    def __init__(self) -> None:

        # The raw table we write onto, and that potentially we could
        # query.
        writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW)

        # The materialized view we query aggregate data from.
        materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY)
        read_schema = materialized_storage.get_schema()
        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    # TODO: Once we are ready to expose the raw data model and select whether to use
                    # materialized storage or the raw one here, replace this with a custom storage
                    # selector that decides when to use the materialized data.
                    storage=materialized_storage,
                ),
            ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Esempio n. 4
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=transaction_translator if custom_mappers is None
                    else transaction_translator.concat(custom_mappers),
                ), ),
            abstract_column_set=schema.get_columns(),
            join_relationships={
                "contains":
                JoinRelationship(
                    rhs_entity=EntityKey.SPANS,
                    columns=[
                        ("project_id", "project_id"),
                        ("span_id", "transaction_span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("event_id", "transaction_id"),
                        ColumnEquivalence("transaction_name",
                                          "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="finish_ts",
        )
Esempio n. 5
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column=None,
        )
Esempio n. 6
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            ),
            abstract_column_set=storage.get_schema().get_columns(),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Esempio n. 7
0
    def __init__(
        self,
        writable_storage_key: Optional[StorageKey],
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
        abstract_column_set: Optional[ColumnSet] = None,
        validators: Optional[Sequence[QueryValidator]] = None,
    ) -> None:
        writable_storage = (get_writable_storage(writable_storage_key)
                            if writable_storage_key else None)
        readable_storage = get_storage(readable_storage_key)
        storages = [readable_storage]
        if writable_storage:
            storages.append(writable_storage)

        if abstract_column_set is None:
            abstract_column_set = ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("bucketed_time", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ])

        if validators is None:
            validators = [
                EntityRequiredColumnValidator({"org_id", "project_id"}),
                GranularityValidator(minimum=10),
            ]

        super().__init__(
            storages=storages,
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=abstract_column_set,
            join_relationships={},
            writable_storage=writable_storage,
            validators=validators,
            required_time_column="timestamp",
        )
Esempio n. 8
0
    def __init__(self, ) -> None:
        writable_storage = get_writable_storage(StorageKey.PROFILES)

        super().__init__(
            storages=[writable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    writable_storage)),
            abstract_column_set=profile_columns,
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator(
                    {"organization_id", "project_id"}),
            ],
            required_time_column="received",
        )
Esempio n. 9
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        if settings.ERRORS_ROLLOUT_ALL:
            events_storage = get_writable_storage(StorageKey.ERRORS)
            pipeline_builder = SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=ErrorsQueryStorageSelector(
                        mappers=errors_translators if custom_mappers is None
                        else errors_translators.concat(custom_mappers))), )
        else:
            events_storage = get_writable_storage(StorageKey.EVENTS)
            pipeline_builder = SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=EventsQueryStorageSelector(
                        mappers=event_translator if custom_mappers is None else
                        event_translator.concat(custom_mappers))), )

        schema = events_storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        super().__init__(
            storages=[events_storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=columns,
            join_relationships={
                "grouped":
                JoinRelationship(
                    rhs_entity=EntityKey.GROUPEDMESSAGES,
                    columns=[("project_id", "project_id"), ("group_id", "id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
                "assigned":
                JoinRelationship(
                    rhs_entity=EntityKey.GROUPASSIGNEE,
                    columns=[("project_id", "project_id"),
                             ("group_id", "group_id")],
                    join_type=JoinType.INNER,
                    equivalences=[],
                ),
            },
            writable_storage=events_storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Esempio n. 10
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SelectedStorageQueryPlanBuilder(
                    selector=SessionsQueryStorageSelector()), ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="started",
        )
Esempio n. 11
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()
        mappers = (transaction_translator if custom_mappers is None else
                   transaction_translator.concat(custom_mappers))

        v1_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=TransactionsQueryStorageSelector(mappers=mappers)), )

        v2_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=get_storage(StorageKey.TRANSACTIONS_V2),
                mappers=mappers,
            ))

        pipeline_builder: QueryPipelineBuilder[
            ClickhouseQueryPlan] = PipelineDelegator(
                query_pipeline_builders={
                    "transactions_v1": v1_pipeline_builder,
                    "transactions_v2": v2_pipeline_builder,
                },
                selector_func=v2_selector_function,
                split_rate_limiter=True,
                ignore_secondary_exceptions=True,
            )

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=schema.get_columns(),
            join_relationships={},
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="finish_ts",
        )
Esempio n. 12
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
            ("span_id", UInt(64, Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        super().__init__(
            storages=[discover_storage],
            query_pipeline_builder=discover_pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Esempio n. 13
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder]
        if settings.ERRORS_ROLLOUT_ALL:
            storage = discover_storage
            sampled_pipeline_builder = SampledSimplePipelineBuilder(
                query_plan_builder=discover_storage_plan_builder)

            pipeline_builder = PipelineDelegator(
                query_pipeline_builders={
                    "primary": discover_pipeline_builder,
                    "sampler": sampled_pipeline_builder,
                },
                selector_func=sampling_selector_func,
                callback_func=sampling_callback_func,
            )
        else:
            storage = events_storage
            pipeline_builder = events_pipeline_builder

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )