Example #1
0
def test_add_column() -> None:
    assert (
        AddColumn(
            StorageSetKey.EVENTS,
            "test_table",
            Column("test", String(Modifiers(nullable=True))),
            after="id",
        ).format_sql()
        == "ALTER TABLE test_table ADD COLUMN IF NOT EXISTS test Nullable(String) AFTER id;"
    )
Example #2
0
def test_like_validator(
    expressions: Sequence[Expression],
    expected_types: Sequence[ParamType],
    extra_param: bool,
    should_raise: bool,
) -> None:
    schema = ColumnSet([
        ("event_id", String()),
        ("level", String(Modifiers(nullable=True))),
        ("str_col", String()),
        ("timestamp", DateTime()),
        ("received", DateTime(Modifiers(nullable=True))),
    ])

    validator = SignatureValidator(expected_types, extra_param)

    if should_raise:
        with pytest.raises(InvalidFunctionCall):
            validator.validate(expressions, schema)
    else:
        validator.validate(expressions, schema)
Example #3
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column=None,
        )
Example #4
0
def test_like_validator(
    expressions: Sequence[Expression],
    expected_types: Sequence[ParamType],
    extra_param: bool,
    should_raise: bool,
) -> None:
    entity = QueryEntity(
        EntityKey.EVENTS,
        ColumnSet([
            ("event_id", String()),
            ("level", String(Modifiers(nullable=True))),
            ("str_col", String()),
            ("timestamp", DateTime()),
            ("received", DateTime(Modifiers(nullable=True))),
        ]),
    )
    func_name = "like"
    validator = SignatureValidator(expected_types, extra_param)

    if should_raise:
        with pytest.raises(InvalidFunctionCall):
            validator.validate(func_name, expressions, entity)
    else:
        validator.validate(func_name, expressions, entity)
Example #5
0
def test_create_table() -> None:
    columns = [
        Column("id", String()),
        Column("name", String(Modifiers(nullable=True))),
        Column("version", UInt(64)),
    ]

    assert (
        CreateTable(
            StorageSetKey.EVENTS,
            "test_table",
            columns,
            ReplacingMergeTree(
                storage_set=StorageSetKey.EVENTS,
                version_column="version",
                order_by="version",
                settings={"index_granularity": "256"},
            ),
        ).format_sql() ==
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;"
    )
Example #6
0
def test_create_table() -> None:
    database = os.environ.get("CLICKHOUSE_DATABASE", "default")
    columns = [
        Column("id", String()),
        Column("name", String(Modifiers(nullable=True))),
        Column("version", UInt(64)),
    ]

    assert CreateTable(
        StorageSetKey.EVENTS,
        "test_table",
        columns,
        ReplacingMergeTree(
            storage_set=StorageSetKey.EVENTS,
            version_column="version",
            order_by="version",
            settings={"index_granularity": "256"},
        ),
    ).format_sql() in [
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;",
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplicatedReplacingMergeTree('/clickhouse/tables/events/{shard}/"
        + f"{database}/test_table'"
        + ", '{replica}', version) ORDER BY version SETTINGS index_granularity=256;",
    ]
Example #7
0
from snuba.clickhouse.columns import String, UInt
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.querylog_processor import QuerylogProcessor
from snuba.datasets.schemas.tables import WritableTableSchema
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings
from snuba.utils.streams.topics import Topic

columns = ColumnSet([
    ("request_id", UUID()),
    ("request_body", String()),
    ("referrer", String()),
    ("dataset", String()),
    ("projects", Array(UInt(64))),
    ("organization", UInt(64, Modifiers(nullable=True))),
    ("timestamp", DateTime()),
    ("duration_ms", UInt(32)),
    ("status", String()),
    # clickhouse_queries Nested columns.
    # This is expanded into arrays instead of being expressed as a
    # Nested column because, when adding new columns to a nested field
    # we need to provide a default for the entire array (each new column
    # is an array).
    # The same schema cannot be achieved with the Nested construct (where
    # we can only provide default for individual values), so, if we
    # use the Nested construct, this schema cannot match the one generated
    # by the migration framework (or by any ALTER statement).
    ("clickhouse_queries.sql", Array(String())),
    ("clickhouse_queries.status", Array(String())),
    ("clickhouse_queries.trace_id", Array(UUID(Modifiers(nullable=True)))),
Example #8
0
from snuba.query.processors.table_rate_limit import TableRateLimit
from snuba.utils.schemas import Nested
from snuba.utils.streams.topics import Topic

LOCAL_TABLE_NAME = "replays_local"
DIST_TABLE_NAME = "replays_dist"

columns = ColumnSet([
    ("replay_id", UUID()),
    ("sequence_id", UInt(16)),
    ("timestamp", DateTime()),
    (
        "trace_ids",
        Array(UUID()),
    ),  # TODO: create bloom filter index / materialize column
    ("title", String(Modifiers(readonly=True))),
    ### common sentry event columns
    ("project_id", UInt(64)),
    # release/environment info
    ("platform", String()),
    ("environment", String(Modifiers(nullable=True))),
    ("release", String(Modifiers(nullable=True))),
    ("dist", String(Modifiers(nullable=True))),
    ("ip_address_v4", IPv4(Modifiers(nullable=True))),
    ("ip_address_v6", IPv6(Modifiers(nullable=True))),
    # user columns
    ("user", String()),
    ("user_hash", UInt(64, Modifiers(readonly=True))),
    ("user_id", String(Modifiers(nullable=True))),
    ("user_name", String(Modifiers(nullable=True))),
    ("user_email", String(Modifiers(nullable=True))),
Example #9
0
    OrganizationRateLimiterProcessor,
    ProjectRateLimiterProcessor,
    ProjectReferrerRateLimiter,
    ReferrerRateLimiterProcessor,
)
from snuba.query.processors.quota_processor import ResourceQuotaProcessor
from snuba.query.validation.validators import EntityRequiredColumnValidator

profile_columns = EntityColumnSet([
    Column("organization_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("profile_id", UUID()),
    Column("received", DateTime()),
    Column("profile", String()),
    Column("android_api_level", UInt(32, Modifiers(nullable=True))),
    Column("device_classification", String()),
    Column("device_locale", String()),
    Column("device_manufacturer", String()),
    Column("device_model", String()),
    Column("device_os_build_number", String(Modifiers(nullable=True))),
    Column("device_os_name", String()),
    Column("device_os_version", String()),
    Column("duration_ns", UInt(64)),
    Column("environment", String(Modifiers(nullable=True))),
    Column("platform", String()),
    Column("trace_id", UUID()),
    Column("transaction_name", String()),
    Column("version_name", String()),
    Column("version_code", String()),
])
Example #10
0
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings
from snuba.datasets.transactions_processor import TransactionsMessageProcessor
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer, )
from snuba.query.processors.mapping_optimizer import MappingOptimizer
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.query.processors.uuid_column_processor import UUIDColumnProcessor
from snuba.web.split import TimeSplitQueryStrategy

columns = ColumnSet([
    ("project_id", UInt(64)),
    ("event_id", UUID()),
    ("trace_id", UUID()),
    ("span_id", UInt(64)),
    ("transaction_name", String()),
    ("transaction_hash", UInt(64, Modifiers(readonly=True))),
    ("transaction_op", String()),
    ("transaction_status", UInt(8)),
    ("start_ts", DateTime()),
    ("start_ms", UInt(16)),
    ("finish_ts", DateTime()),
    ("finish_ms", UInt(16)),
    ("duration", UInt(32)),
    ("platform", String()),
    ("environment", String(Modifiers(nullable=True))),
    ("release", String(Modifiers(nullable=True))),
    ("dist", String(Modifiers(nullable=True))),
    ("ip_address_v4", IPv4(Modifiers(nullable=True))),
    ("ip_address_v6", IPv6(Modifiers(nullable=True))),
    ("user", String()),
    ("user_hash", UInt(64, Modifiers(readonly=True))),
Example #11
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import WritableTableSchema
from snuba.datasets.spans_processor import SpansMessageProcessor
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.web.split import TimeSplitQueryStrategy

columns = ColumnSet([
    ("project_id", UInt(64)),
    ("transaction_id", UUID()),
    ("trace_id", UUID()),
    ("transaction_span_id", UInt(64)),
    ("span_id", UInt(64)),
    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
    ("transaction_name", String()),
    ("description", String()),  # description in span
    ("op", String()),
    ("status", UInt(8)),
    ("start_ts", DateTime()),
    ("start_ns", UInt(32)),
    ("finish_ts", DateTime()),
    ("finish_ns", UInt(32)),
    ("duration_ms", UInt(32)),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
    ("retention_days", UInt(16)),
    ("deleted", UInt(8)),
])
Example #12
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)

        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=discover_storage,
                mappers=events_translation_mappers.concat(
                    transaction_translation_mappers).
                concat(null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                        ),
                        ColumnToMapping(None, "geo_region", None, "contexts",
                                        "geo.region"),
                        ColumnToMapping(None, "geo_city", None, "contexts",
                                        "geo.city"),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
            ))

        def selector_func(_query: Query) -> Tuple[str, List[str]]:
            if random.random() < float(
                    state.get_config("discover_query_percentage", 0)):
                return "events", ["discover"]

            return "events", []

        super().__init__(
            storages=[events_storage, discover_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "discover": discover_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "discover"),
            ),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
        )
Example #13
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        super().__init__(
            storages=[events_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            writable_storage=None,
        )
Example #14
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder]
        if settings.ERRORS_ROLLOUT_ALL:
            storage = discover_storage
            sampled_pipeline_builder = SampledSimplePipelineBuilder(
                query_plan_builder=discover_storage_plan_builder)

            pipeline_builder = PipelineDelegator(
                query_pipeline_builders={
                    "primary": discover_pipeline_builder,
                    "sampler": sampled_pipeline_builder,
                },
                selector_func=sampling_selector_func,
                callback_func=sampling_callback_func,
            )
        else:
            storage = events_storage
            pipeline_builder = events_pipeline_builder

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Example #15
0
from snuba.datasets.outcomes_processor import OutcomesProcessor
from snuba.datasets.schemas.tables import TableSchema, WritableTableSchema
from snuba.datasets.storage import ReadableTableStorage, WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings
from snuba.query.processors.prewhere import PrewhereProcessor

WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local"
WRITE_DIST_TABLE_NAME = "outcomes_raw_dist"
READ_LOCAL_TABLE_NAME = "outcomes_hourly_local"
READ_DIST_TABLE_NAME = "outcomes_hourly_dist"

write_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("key_id", UInt(64, Modifiers(nullable=True))),
    ("timestamp", DateTime()),
    ("outcome", UInt(8)),
    ("reason", String(Modifiers(nullable=True))),
    ("event_id", UUID(Modifiers(nullable=True))),
])

raw_schema = WritableTableSchema(
    columns=write_columns,
    # TODO: change to outcomes.raw_local when we add multi DB support
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
)

read_columns = ColumnSet([
Example #16
0
    subscript_names: Set[str]

    def attempt_map(
        self,
        expression: SubscriptableReference,
        children_translator: SnubaClickhouseStrictTranslator,
    ) -> Optional[FunctionCall]:
        if expression.column.column_name in self.subscript_names:
            return identity(Literal(None, None), expression.alias)
        else:
            return None


EVENTS_COLUMNS = ColumnSet([
    ("group_id", UInt(64, Modifiers(nullable=True))),
    ("primary_hash", FixedString(32, Modifiers(nullable=True))),
    # Promoted tags
    ("level", String(Modifiers(nullable=True))),
    ("logger", String(Modifiers(nullable=True))),
    ("server_name", String(Modifiers(nullable=True))),
    ("site", String(Modifiers(nullable=True))),
    ("url", String(Modifiers(nullable=True))),
    ("location", String(Modifiers(nullable=True))),
    ("culprit", String(Modifiers(nullable=True))),
    ("received", DateTime(Modifiers(nullable=True))),
    ("sdk_integrations", Array(String(), Modifiers(nullable=True))),
    ("version", String(Modifiers(nullable=True))),
    # exception interface
    (
        "exception_stacks",
import pytest
from snuba.clickhouse.columns import ColumnSet, Nested
from snuba.clickhouse.columns import SchemaModifiers as Modifiers
from snuba.clickhouse.columns import String, UInt
from snuba.clickhouse.query import Query as ClickhouseQuery
from snuba.query import SelectedExpression
from snuba.query.data_source.simple import Table
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.request.request_settings import HTTPRequestSettings

columns = ColumnSet([
    ("promoted", UInt(8, Modifiers(nullable=True))),
    ("tags", Nested([("key", String()), ("value", String())])),
])

test_cases = [
    (
        "not promoted",
        ClickhouseQuery(
            Table("events", columns),
            selected_columns=[
                SelectedExpression(
                    "tags[foo]",
                    FunctionCall(
                        "tags[foo]",
                        "arrayValue",
                        (
                            Column(None, None, "tags.value"),
                            FunctionCall(
                                None,
Example #18
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
            ("span_id", UInt(64, Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        super().__init__(
            storages=[discover_storage],
            query_pipeline_builder=discover_pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Example #19
0
from snuba.query.conditions import ConditionFunctions, binary_condition
from snuba.query.expressions import Column, Literal

columns = ColumnSet([
    # columns to maintain the dataset
    # Kafka topic offset
    ("offset", UInt(64)),
    # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion
    # of the record. Having a dedicated clickhouse-only flag to identify this case seems
    # more consistent than add an additional value into the status field below that does not
    # exists on the Sentry side.
    ("record_deleted", UInt(8)),
    # PG columns
    ("project_id", UInt(64)),
    ("id", UInt(64)),
    ("status", UInt(8, Modifiers(nullable=True))),
    ("last_seen", DateTime(Modifiers(nullable=True))),
    ("first_seen", DateTime(Modifiers(nullable=True))),
    ("active_at", DateTime(Modifiers(nullable=True))),
    ("first_release_id", UInt(64, Modifiers(nullable=True))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="groupedmessage_local",
    dist_table_name="groupedmessage_dist",
    storage_set_key=StorageSetKey.EVENTS,
    mandatory_conditions=[
        binary_condition(
            ConditionFunctions.EQ,
            Column(None, None, "record_deleted"),
Example #20
0
from snuba.clickhouse.columns import SchemaModifiers as Modifiers
from snuba.clickhouse.columns import String, UInt
from snuba.clickhouse.query import Query
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.query import SelectedExpression
from snuba.query.data_source.simple import Table
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.processors.null_column_caster import NullColumnCaster
from snuba.query.query_settings import HTTPQuerySettings

columns1 = ColumnSet([
    ("not_mismatched", DateTime()),
    ("mismatched1", String(Modifiers(nullable=True))),
    ("mismatched2", UInt(64, Modifiers(nullable=True))),
])

columns2 = ColumnSet([
    ("timestamp", DateTime()),
    ("mismatched1", String()),  # non-nullable by default
    ("mismatched2", UInt(64, Modifiers(nullable=False))),
])

schema1 = TableSchema(
    columns=columns1,
    local_table_name="discover_local",
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
    mandatory_conditions=[],
Example #21
0
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.processors.consistency_enforcer import (
    ConsistencyEnforcerProcessor, )
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.utils.streams.topics import Topic

columns = ColumnSet([
    # columns to maintain the dataset
    # Kafka topic offset
    ("offset", UInt(64)),
    ("record_deleted", UInt(8)),
    # PG columns
    ("project_id", UInt(64)),
    ("group_id", UInt(64)),
    ("date_added", DateTime(Modifiers(nullable=True))),
    ("user_id", UInt(64, Modifiers(nullable=True))),
    ("team_id", UInt(64, Modifiers(nullable=True))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="groupassignee_local",
    dist_table_name="groupassignee_dist",
    storage_set_key=StorageSetKey.CDC,
)

POSTGRES_TABLE = "sentry_groupasignee"

storage = CdcStorage(
    storage_key=StorageKey.GROUPASSIGNEES,
Example #22
0
from snuba.query.composite import CompositeQuery
from snuba.query.conditions import binary_condition
from snuba.query.data_source.join import (
    IndividualNode,
    JoinClause,
    JoinCondition,
    JoinConditionExpression,
    JoinType,
)
from snuba.query.data_source.simple import Entity, Table
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.formatters.tracing import TExpression, format_query
from snuba.query.logical import Query as LogicalQuery
from tests.query.joins.equivalence_schema import EVENTS_SCHEMA, GROUPS_SCHEMA

columns = ColumnSet([("some_int", UInt(8, Modifiers(nullable=True)))])

BASIC_JOIN = JoinClause(
    left_node=IndividualNode(
        alias="ev",
        data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None),
    ),
    right_node=IndividualNode(
        alias="gr",
        data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None),
    ),
    keys=[
        JoinCondition(
            left=JoinConditionExpression("ev", "group_id"),
            right=JoinConditionExpression("gr", "id"),
        )
Example #23
0
required_columns = [
    "event_id",
    "primary_hash",
    "project_id",
    "group_id",
    "timestamp",
    "deleted",
    "retention_days",
]

all_columns = ColumnSet([
    ("project_id", UInt(64)),
    ("timestamp", DateTime()),
    ("event_id", UUID()),
    ("platform", String()),
    ("environment", String(Modifiers(nullable=True))),
    ("release", String(Modifiers(nullable=True))),
    ("dist", String(Modifiers(nullable=True))),
    ("ip_address_v4", IPv4(Modifiers(nullable=True))),
    ("ip_address_v6", IPv6(Modifiers(nullable=True))),
    ("user", String()),
    ("user_hash", UInt(64, Modifiers(readonly=True))),
    ("user_id", String(Modifiers(nullable=True))),
    ("user_name", String(Modifiers(nullable=True))),
    ("user_email", String(Modifiers(nullable=True))),
    ("sdk_name", String(Modifiers(nullable=True))),
    ("sdk_version", String(Modifiers(nullable=True))),
    ("http_method", String(Modifiers(nullable=True))),
    ("http_referer", String(Modifiers(nullable=True))),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
Example #24
0
    PostReplacementConsistencyEnforcer,
)
from snuba.query.conditions import ConditionFunctions, binary_condition
from snuba.query.expressions import Column, Literal
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer,
)
from snuba.query.processors.mapping_optimizer import MappingOptimizer
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy

metadata_columns = ColumnSet(
    [
        # optional stream related data
        ("offset", UInt(64, Modifiers(nullable=True))),
        ("partition", UInt(16, Modifiers(nullable=True))),
        ("message_timestamp", DateTime()),
    ]
)

promoted_tag_columns = ColumnSet(
    [
        # These are the classic tags, they are saved in Snuba exactly as they
        # appear in the event body.
        ("level", String(Modifiers(nullable=True))),
        ("logger", String(Modifiers(nullable=True))),
        ("server_name", String(Modifiers(nullable=True))),  # future name: device_id?
        ("transaction", String(Modifiers(nullable=True))),
        ("environment", String(Modifiers(nullable=True))),
        ("sentry:release", String(Modifiers(nullable=True))),
Example #25
0
from snuba.query.processors.mapping_optimizer import MappingOptimizer
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.query.processors.type_converters.uuid_column_processor import (
    UUIDColumnProcessor,
)
from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy

columns = ColumnSet(
    [
        ("event_id", UUID()),
        ("project_id", UInt(64)),
        ("type", String()),
        ("timestamp", DateTime()),
        ("platform", String()),
        ("environment", String(Modifiers(nullable=True))),
        ("release", String(Modifiers(nullable=True))),
        ("dist", String(Modifiers(nullable=True))),
        ("transaction_name", String()),
        ("message", String()),
        ("title", String()),
        ("user", String()),
        ("user_hash", UInt(64)),
        ("user_id", String(Modifiers(nullable=True))),
        ("user_name", String(Modifiers(nullable=True))),
        ("user_email", String(Modifiers(nullable=True))),
        ("ip_address_v4", IPv4(Modifiers(nullable=True))),
        ("ip_address_v6", IPv6(Modifiers(nullable=True))),
        ("sdk_name", String(Modifiers(nullable=True))),
        ("sdk_version", String(Modifiers(nullable=True))),
        ("http_method", String(Modifiers(nullable=True))),
Example #26
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)

        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=discover_storage,
                mappers=events_translation_mappers.concat(
                    transaction_translation_mappers).
                concat(null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
            ))

        def selector_func(_query: Query,
                          referrer: str) -> Tuple[str, List[str]]:
            # In case something goes wrong, set this to 1 to revert to the events storage.
            kill_rollout = state.get_config("errors_rollout_killswitch", 0)
            assert isinstance(kill_rollout, (int, str))
            if int(kill_rollout):
                return "events", []

            if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER:
                return "discover", []

            if settings.ERRORS_ROLLOUT_ALL:
                return "discover", []

            default_threshold = state.get_config("discover_query_percentage",
                                                 0)
            assert isinstance(default_threshold, (float, int, str))

            threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get(
                referrer, default_threshold)

            if random.random() < float(threshold):
                return "events", ["discover"]

            return "events", []

        super().__init__(
            storages=[events_storage, discover_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "discover": discover_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "discover"),
            ),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            required_filter_columns=["project_id"],
            required_time_column="timestamp",
        )