Example #1
0
READ_DIST_MV_NAME = "sessions_hourly_mv_dist"


all_columns = ColumnSet(
    [
        ("session_id", UUID()),
        ("distinct_id", UUID()),
        ("quantity", UInt(32)),
        ("seq", UInt(64)),
        ("org_id", UInt(64)),
        ("project_id", UInt(64)),
        ("retention_days", UInt(16)),
        ("duration", UInt(32)),
        ("status", UInt(8)),
        ("errors", UInt(16)),
        ("received", DateTime()),
        ("started", DateTime()),
        ("release", String()),
        ("environment", String()),
        ("user_agent", String()),
        ("os", String()),
    ]
)

raw_schema = WritableTableSchema(
    columns=all_columns,
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
)
Example #2
0
columns = [
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("trace_id", UUID()),
    Column("transaction_span_id", UInt(64)),
    Column("span_id", UInt(64)),
    Column("parent_span_id", Nullable(UInt(64))),
    Column("transaction_name", LowCardinality(String())),
    Column("description", String()),  # description in span
    Column("op", LowCardinality(String())),
    Column(
        "status",
        WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),
    ),
    Column("start_ts", DateTime()),
    Column("start_ns", UInt(32)),
    Column("finish_ts", DateTime()),
    Column("finish_ns", UInt(32)),
    Column("duration_ms", UInt(32)),
    tags_col,
    Column("retention_days", UInt(16)),
    Column("deleted", UInt(8)),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
        return [
Example #3
0
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer, )
from snuba.query.processors.mapping_optimizer import MappingOptimizer
from snuba.query.processors.prewhere import PrewhereProcessor
from snuba.web.split import TimeSplitQueryStrategy

columns = ColumnSet([
    ("project_id", UInt(64)),
    ("event_id", UUID()),
    ("trace_id", UUID()),
    ("span_id", UInt(64)),
    ("transaction_name", String()),
    ("transaction_hash", UInt(64, Modifiers(readonly=True))),
    ("transaction_op", String()),
    ("transaction_status", UInt(8)),
    ("start_ts", DateTime()),
    ("start_ms", UInt(16)),
    ("finish_ts", DateTime()),
    ("finish_ms", UInt(16)),
    ("duration", UInt(32)),
    ("platform", String()),
    ("environment", String(Modifiers(nullable=True))),
    ("release", String(Modifiers(nullable=True))),
    ("dist", String(Modifiers(nullable=True))),
    ("ip_address_v4", IPv4(Modifiers(nullable=True))),
    ("ip_address_v6", IPv6(Modifiers(nullable=True))),
    ("user", String()),
    ("user_hash", UInt(64, Modifiers(readonly=True))),
    ("user_id", String(Modifiers(nullable=True))),
    ("user_name", String(Modifiers(nullable=True))),
    ("user_email", String(Modifiers(nullable=True))),
    Float,
    Nested,
    String,
    UInt,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers

# List from 0001_events_initial hierachical hashes columns
columns: List[Column[Modifiers]] = [
    Column("event_id", FixedString(32)),
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("timestamp", DateTime()),
    Column("deleted", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("platform", String(Modifiers(nullable=True))),
    Column("message", String(Modifiers(nullable=True))),
    Column("primary_hash", FixedString(32, Modifiers(nullable=True))),
    Column("hierarchical_hashes", Array(FixedString(32))),
    Column("received", DateTime(Modifiers(nullable=True))),
    Column("search_message", String(Modifiers(nullable=True))),
    Column("title", String(Modifiers(nullable=True))),
    Column("location", String(Modifiers(nullable=True))),
    Column("user_id", String(Modifiers(nullable=True))),
    Column("username", String(Modifiers(nullable=True))),
    Column("email", String(Modifiers(nullable=True))),
    Column("ip_address", String(Modifiers(nullable=True))),
    Column("geo_country_code", String(Modifiers(nullable=True))),
Example #5
0
from typing import Sequence

from snuba.clickhouse.columns import Column, DateTime, Enum, String, UInt, WithDefault
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations
from snuba.migrations.context import Context
from snuba.migrations.status import Status
from snuba.migrations.table_engines import Distributed, ReplacingMergeTree

columns = [
    Column("group", String()),
    Column("migration_id", String()),
    Column("timestamp", DateTime()),
    Column(
        "status",
        Enum([("completed", 0), ("in_progress", 1), ("not_started", 2)]),
    ),
    Column("version", WithDefault(UInt(64), "1")),
]


class Migration(migration.Migration):
    """
    This migration extends Migration instead of MultiStepMigration since it is
    responsible for bootstrapping the migration system itself. It skips setting
    the in progress status in the forwards method and the not started status in
    the backwards method. Since the migration table doesn't exist yet, we can't
    write any statuses until this migration is completed.
    """

    blocking = False
Example #6
0
from snuba.clickhouse.columns import ColumnSet, DateTime
from snuba.clickhouse.columns import SchemaModifiers as Modifiers
from snuba.clickhouse.columns import String, UInt
from snuba.clickhouse.query import Query
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.query import SelectedExpression
from snuba.query.data_source.simple import Table
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.processors.null_column_caster import NullColumnCaster
from snuba.query.query_settings import HTTPQuerySettings

columns1 = ColumnSet([
    ("not_mismatched", DateTime()),
    ("mismatched1", String(Modifiers(nullable=True))),
    ("mismatched2", UInt(64, Modifiers(nullable=True))),
])

columns2 = ColumnSet([
    ("timestamp", DateTime()),
    ("mismatched1", String()),  # non-nullable by default
    ("mismatched2", UInt(64, Modifiers(nullable=False))),
])

schema1 = TableSchema(
    columns=columns1,
    local_table_name="discover_local",
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
Example #7
0
    MaterializedViewSchema,
)
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.processors.prewhere import PrewhereProcessor

WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local"
WRITE_DIST_TABLE_NAME = "outcomes_raw_dist"
READ_LOCAL_TABLE_NAME = "outcomes_hourly_local"
READ_DIST_TABLE_NAME = "outcomes_hourly_dist"

write_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("key_id", Nullable(UInt(64))),
    ("timestamp", DateTime()),
    ("outcome", UInt(8)),
    ("reason", LowCardinality(Nullable(String()))),
    ("event_id", Nullable(UUID())),
])

raw_schema = MergeTreeSchema(
    columns=write_columns,
    # TODO: change to outcomes.raw_local when we add multi DB support
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
    order_by="(org_id, project_id, timestamp)",
    partition_by="(toMonday(timestamp))",
    settings={"index_granularity": "16384"},
)
Example #8
0
 pytest.param(
     Float(64, Modifier(nullable=True)),
     Float(64),
     Float(32, Modifier(nullable=True)),
     "Nullable(Float64)",
     id="floats",
 ),
 pytest.param(
     Date(),
     Date(),
     Date(Modifier(nullable=True)),
     "Date",
     id="dates",
 ),
 pytest.param(
     DateTime(),
     DateTime(),
     DateTime(Modifier(nullable=True)),
     "DateTime",
     id="datetimes",
 ),
 pytest.param(
     Array(String(Modifier(nullable=True))),
     Array(String()),
     Array(String()),
     "Array(Nullable(String))",
     id="arrays",
 ),
 pytest.param(
     Nested(
         [("key", String()), ("val", String(Modifier(nullable=True)))],
Example #9
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet(
            [
                ("event_id", FixedString(32)),
                ("project_id", UInt(64)),
                ("type", Nullable(String())),
                ("timestamp", DateTime()),
                ("platform", Nullable(String())),
                ("environment", Nullable(String())),
                ("release", Nullable(String())),
                ("dist", Nullable(String())),
                ("user", Nullable(String())),
                ("transaction", Nullable(String())),
                ("message", Nullable(String())),
                ("title", Nullable(String())),
                # User
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # SDK
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                # geo location context
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # Other tags and context
                ("tags", Nested([("key", String()), ("value", String())])),
                ("contexts", Nested([("key", String()), ("value", String())])),
            ]
        )

        self.__events_columns = ColumnSet(
            [
                ("group_id", Nullable(UInt(64))),
                ("primary_hash", Nullable(FixedString(32))),
                # Promoted tags
                ("level", Nullable(String())),
                ("logger", Nullable(String())),
                ("server_name", Nullable(String())),
                ("site", Nullable(String())),
                ("url", Nullable(String())),
                ("search_message", Nullable(String())),
                ("location", Nullable(String())),
                ("culprit", Nullable(String())),
                ("received", Nullable(DateTime())),
                ("sdk_integrations", Nullable(Array(String()))),
                ("version", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested(
                        [
                            ("type", Nullable(String())),
                            ("value", Nullable(String())),
                            ("mechanism_type", Nullable(String())),
                            ("mechanism_handled", Nullable(UInt(8))),
                        ]
                    ),
                ),
                (
                    "exception_frames",
                    Nested(
                        [
                            ("abs_path", Nullable(String())),
                            ("filename", Nullable(String())),
                            ("package", Nullable(String())),
                            ("module", Nullable(String())),
                            ("function", Nullable(String())),
                            ("in_app", Nullable(UInt(8))),
                            ("colno", Nullable(UInt(32))),
                            ("lineno", Nullable(UInt(32))),
                            ("stack_level", UInt(16)),
                        ]
                    ),
                ),
                ("modules", Nested([("name", String()), ("version", String())])),
            ]
        )

        self.__transactions_columns = ColumnSet(
            [
                ("trace_id", Nullable(UUID())),
                ("span_id", Nullable(UInt(64))),
                ("transaction_hash", Nullable(UInt(64))),
                ("transaction_op", Nullable(String())),
                ("transaction_status", Nullable(UInt(8))),
                ("duration", Nullable(UInt(32))),
                (
                    "measurements",
                    Nested([("key", LowCardinality(String())), ("value", Float(64))]),
                ),
            ]
        )

        events_storage = get_storage(StorageKey.EVENTS)
        events_ro_storage = get_storage(StorageKey.EVENTS_RO)
        transactions_storage = get_storage(StorageKey.TRANSACTIONS)

        self.__time_group_columns: Mapping[str, str] = {}
        self.__time_parse_columns = ("timestamp",)

        super().__init__(
            storages=[events_storage, transactions_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=DiscoverQueryStorageSelector(
                    events_table=events_storage,
                    events_ro_table=events_ro_storage,
                    abstract_events_columns=self.__events_columns,
                    transactions_table=transactions_storage,
                    abstract_transactions_columns=self.__transactions_columns,
                ),
            ),
            abstract_column_set=(
                self.__common_columns
                + self.__events_columns
                + self.__transactions_columns
            ),
            writable_storage=None,
        )
Example #10
0
    WithCodecs,
    WithDefault,
)
from snuba.datasets.dataset_schemas import StorageSchemas
from snuba.datasets.errors_processor import ErrorsProcessor
from snuba.datasets.errors_replacer import ErrorsReplacer, ReplacerState
from snuba.datasets.schemas.tables import ReplacingMergeTreeSchema
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.table_storage import TableWriter, KafkaStreamLoader
from snuba.query.processors.prewhere import PrewhereProcessor

all_columns = ColumnSet(
    [
        ("org_id", UInt(64)),
        ("project_id", UInt(64)),
        ("timestamp", DateTime()),
        ("event_id", WithCodecs(UUID(), ["NONE"])),
        (
            "event_hash",
            WithCodecs(
                Materialized(UInt(64), "cityHash64(toString(event_id))",),
                ["NONE"],
            ),
        ),
        ("platform", LowCardinality(String())),
        ("environment", LowCardinality(Nullable(String()))),
        ("release", LowCardinality(Nullable(String()))),
        ("dist", LowCardinality(Nullable(String()))),
        ("ip_address_v4", Nullable(IPv4())),
        ("ip_address_v6", Nullable(IPv6())),
        ("user", WithDefault(String(), "''")),
Example #11
0
from snuba.query.processors.uniq_in_select_and_having import (
    UniqInSelectAndHavingProcessor,
)
from snuba.web.split import TimeSplitQueryStrategy

columns = ColumnSet(
    [
        ("project_id", UInt(64)),
        ("event_id", UUID()),
        ("trace_id", UUID(Modifiers(nullable=True))),
        ("span_id", UInt(64)),
        ("transaction_name", String()),
        ("transaction_hash", UInt(64, Modifiers(readonly=True))),
        ("transaction_op", String()),
        ("transaction_status", UInt(8)),
        ("start_ts", DateTime()),
        ("start_ms", UInt(16)),
        ("finish_ts", DateTime()),
        ("finish_ms", UInt(16)),
        ("duration", UInt(32)),
        ("platform", String()),
        ("environment", String(Modifiers(nullable=True))),
        ("release", String(Modifiers(nullable=True))),
        ("dist", String(Modifiers(nullable=True))),
        ("ip_address_v4", IPv4(Modifiers(nullable=True))),
        ("ip_address_v6", IPv6(Modifiers(nullable=True))),
        ("user", String()),
        ("user_hash", UInt(64, Modifiers(readonly=True))),
        ("user_id", String(Modifiers(nullable=True))),
        ("user_name", String(Modifiers(nullable=True))),
        ("user_email", String(Modifiers(nullable=True))),
Example #12
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)

        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=discover_storage,
                mappers=events_translation_mappers.concat(
                    transaction_translation_mappers).
                concat(null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                        ),
                        ColumnToMapping(None, "geo_region", None, "contexts",
                                        "geo.region"),
                        ColumnToMapping(None, "geo_city", None, "contexts",
                                        "geo.city"),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
            ))

        def selector_func(_query: Query) -> Tuple[str, List[str]]:
            if random.random() < float(
                    state.get_config("discover_query_percentage", 0)):
                return "events", ["discover"]

            return "events", []

        super().__init__(
            storages=[events_storage, discover_storage],
            query_pipeline_builder=PipelineDelegator(
                query_pipeline_builders={
                    "events": events_pipeline_builder,
                    "discover": discover_pipeline_builder,
                },
                selector_func=selector_func,
                callback_func=partial(callback_func, "discover"),
            ),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
        )
Example #13
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        super().__init__(
            storages=[events_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))),
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            writable_storage=None,
        )
Example #14
0
    LowCardinality,
    Materialized,
    Nullable,
    String,
    UInt,
    UUID,
    WithCodecs,
    WithDefault,
)
from snuba.migrations.parse_schema import _get_column


test_data = [
    # Basic types
    (("Date", "", "", ""), Date()),
    (("DateTime", "", "", ""), DateTime()),
    (
        ("Enum8('success' = 0, 'error' = 1)", "", "", ""),
        Enum([("success", 0), ("error", 1)]),
    ),
    (("FixedString(32)", "", "", ""), FixedString(32)),
    (("Float32", "", "", ""), Float(32)),
    (("IPv4", "", "", ""), IPv4()),
    (("IPv6", "", "", ""), IPv6()),
    (("String", "", "", ""), String()),
    (("UInt32", "", "", ""), UInt(32)),
    (("UUID", "", "", ""), UUID()),
    # Aggregate functions
    (
        ("AggregateFunction(uniq, UInt8)", "", "", ""),
        AggregateFunction("uniq", UInt(8)),
Example #15
0
    Column,
    DateTime,
    Nullable,
    UInt,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines

columns = [
    # Kafka topic offset
    Column("offset", UInt(64)),
    Column("record_deleted", UInt(8)),
    # PG columns
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("date_added", Nullable(DateTime())),
    Column("user_id", Nullable(UInt(64))),
    Column("team_id", Nullable(UInt(64))),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.EVENTS,
                table_name="groupassignee_local",
                columns=columns,
                engine=table_engines.ReplacingMergeTree(
Example #16
0
    FixedString,
    Float,
    Nested,
    Nullable,
    String,
    UInt,
)

from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines

columns = [
    Column("event_id", FixedString(32)),
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("timestamp", DateTime()),
    Column("deleted", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("platform", Nullable(String())),
    Column("message", Nullable(String())),
    Column("primary_hash", Nullable(FixedString(32))),
    Column("received", Nullable(DateTime())),
    Column("search_message", Nullable(String())),
    Column("title", Nullable(String())),
    Column("location", Nullable(String())),
    Column("user_id", Nullable(String())),
    Column("username", Nullable(String())),
    Column("email", Nullable(String())),
    Column("ip_address", Nullable(String())),
    Column("geo_country_code", Nullable(String())),
    Column("geo_region", Nullable(String())),
Example #17
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("duration", UInt(32)),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "sdk_name",
                 String(Modifiers(low_cardinality=True, default="''"))),
             after="user_email",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "sdk_version",
                 String(Modifiers(low_cardinality=True, default="''"))),
             after="sdk_name",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "transaction_status",
                 UInt(8, Modifiers(default=str(UNKNOWN_SPAN_STATUS))),
             ),
             after="transaction_op",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("_tags_flattened", String()),
             after="tags",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("_contexts_flattened", String()),
             after="contexts",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "user_hash",
                 UInt(64, Modifiers(materialized="cityHash64(user)"))),
             after="user",
         ),
         # The following columns were originally created as non low cardinality strings
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("transaction_name",
                           String(Modifiers(low_cardinality=True))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "release",
                 String(Modifiers(nullable=True, low_cardinality=True))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "dist",
                 String(Modifiers(nullable=True, low_cardinality=True))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "sdk_name",
                 String(Modifiers(low_cardinality=True, default="''"))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "sdk_version",
                 String(Modifiers(low_cardinality=True, default="''"))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "environment",
                 String(Modifiers(nullable=True, low_cardinality=True)),
             ),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("message_timestamp", DateTime()),
             after="offset",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column_name="_start_date",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column_name="_finish_date",
         ),
     ]
Example #18
0
from typing import Sequence

from snuba.clickhouse.columns import Column, DateTime, UInt
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers

columns: Sequence[Column[Modifiers]] = [
    # Kafka topic offset
    Column("offset", UInt(64)),
    Column("record_deleted", UInt(8)),
    # PG columns
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("date_added", DateTime(Modifiers(nullable=True))),
    Column("user_id", UInt(64, Modifiers(nullable=True))),
    Column("team_id", UInt(64, Modifiers(nullable=True))),
]


class Migration(migration.ClickhouseNodeMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.EVENTS,
                table_name="groupassignee_local",
                columns=columns,
                engine=table_engines.ReplacingMergeTree(
                    storage_set=StorageSetKey.EVENTS,
Example #19
0
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.processors.replaced_groups import (
    PostReplacementConsistencyEnforcer, )
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.conditions import ConditionFunctions, binary_condition
from snuba.query.expressions import Column, Literal
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer, )
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.query.processors.prewhere import PrewhereProcessor

all_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("timestamp", DateTime()),
    ("event_id", UUID()),
    ("event_hash", ReadOnly(UInt(64))),
    ("platform", String()),
    ("environment", Nullable(String())),
    ("release", Nullable(String())),
    ("dist", Nullable(String())),
    ("ip_address_v4", Nullable(IPv4())),
    ("ip_address_v6", Nullable(IPv6())),
    ("user", String()),
    ("user_hash", ReadOnly(UInt(64))),
    ("user_id", Nullable(String())),
    ("user_name", Nullable(String())),
    ("user_email", Nullable(String())),
    ("sdk_name", Nullable(String())),
    ("sdk_version", Nullable(String())),
Example #20
0
    DateTime,
    Enum,
    FixedString,
    Float,
    IPv4,
    IPv6,
    String,
    UInt,
)
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.migrations.parse_schema import _get_column

test_data = [
    # Basic types
    (("Date", "", "", ""), Date()),
    (("DateTime", "", "", ""), DateTime()),
    (
        ("Enum8('success' = 0, 'error' = 1)", "", "", ""),
        Enum([("success", 0), ("error", 1)]),
    ),
    (("FixedString(32)", "", "", ""), FixedString(32)),
    (("Float32", "", "", ""), Float(32)),
    (("IPv4", "", "", ""), IPv4()),
    (("IPv6", "", "", ""), IPv6()),
    (("String", "", "", ""), String()),
    (("UInt32", "", "", ""), UInt(32)),
    (("UUID", "", "", ""), UUID()),
    # Aggregate functions
    (
        ("AggregateFunction(uniq, UInt8)", "", "", ""),
        AggregateFunction("uniq", [UInt(8)]),
Example #21
0
    UUID,
    AggregateFunction,
    Column,
    DateTime,
    String,
    UInt,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import operations
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.processor import MAX_UINT32, NIL_UUID

aggregate_columns_v1: Sequence[Column[Modifiers]] = [
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("started", DateTime()),
    Column("release", String(Modifiers(low_cardinality=True))),
    Column("environment", String(Modifiers(low_cardinality=True))),
    Column(
        "duration_quantiles",
        AggregateFunction("quantilesIf(0.5, 0.9)",
                          [UInt(32), UInt(8)]),
    ),
    Column("sessions", AggregateFunction("countIf", [UUID(), UInt(8)])),
    Column("users", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
    Column("sessions_crashed", AggregateFunction("countIf",
                                                 [UUID(), UInt(8)])),
    Column("sessions_abnormal", AggregateFunction("countIf",
                                                  [UUID(), UInt(8)])),
    Column("sessions_errored", AggregateFunction("uniqIf",
                                                 [UUID(), UInt(8)])),
Example #22
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("group_id", UInt(64)),
             after="project_id",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column_name="device_model",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("sdk_integrations", Array(String())),
             after="exception_frames",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("modules.name", Nested([("name", String())])),
             after="sdk_integrations",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("culprit", String(Modifiers(nullable=True))),
             after="sdk_integrations",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("search_message",
                           String(Modifiers(nullable=True))),
             after="received",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("title", String(Modifiers(nullable=True))),
             after="search_message",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("location", String(Modifiers(nullable=True))),
             after="title",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("_tags_flattened", String()),
             after="tags",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="sentry_local",
             column=Column("message_timestamp", DateTime()),
             after="partition",
         ),
     ]
Example #23
0
    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            mandatory_conditions=[('deleted', '=', 0)],
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            )
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
            time_parse_columns=('timestamp', 'received')
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Example #24
0
columns = ColumnSet([
    ("project_id", UInt(64)),
    ("transaction_id", UUID()),
    ("trace_id", UUID()),
    ("transaction_span_id", UInt(64)),
    ("span_id", UInt(64)),
    ("parent_span_id", Nullable(UInt(64))),
    ("transaction_name", LowCardinality(String())),
    ("description", String()),  # description in span
    ("op", LowCardinality(String())),
    (
        "status",
        WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),
    ),
    ("start_ts", DateTime()),
    ("start_ns", UInt(32)),
    ("finish_ts", DateTime()),
    ("finish_ns", UInt(32)),
    ("duration_ms", UInt(32)),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)),
    ("retention_days", UInt(16)),
    ("deleted", UInt(8)),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="spans_experimental_local",
    dist_table_name="spans_experimental_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS,
Example #25
0
from typing import List, Sequence

from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers

columns: List[Column[Modifiers]] = [
    # primary key
    Column("organization_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("profile_id", UUID()),
    Column("received", DateTime()),
    # profiling data
    Column("profile", String(Modifiers(codecs=["LZ4HC(9)"]))),
    # filtering data
    Column("android_api_level", UInt(32, Modifiers(nullable=True))),
    Column("device_classification", String(Modifiers(low_cardinality=True))),
    Column("device_locale", String(Modifiers(low_cardinality=True))),
    Column("device_manufacturer", String(Modifiers(low_cardinality=True))),
    Column("device_model", String(Modifiers(low_cardinality=True))),
    Column("device_os_build_number",
           String(Modifiers(low_cardinality=True, nullable=True))),
    Column("device_os_name", String(Modifiers(low_cardinality=True))),
    Column("device_os_version", String(Modifiers(low_cardinality=True))),
    Column("duration_ns", UInt(64)),
    Column("environment", String(Modifiers(nullable=True,
                                           low_cardinality=True))),
    Column("platform", String(Modifiers(low_cardinality=True))),
    Column("trace_id", UUID()),
Example #26
0
        else:
            return None


EVENTS_COLUMNS = ColumnSet([
    ("group_id", UInt(64, Modifiers(nullable=True))),
    ("primary_hash", FixedString(32, Modifiers(nullable=True))),
    # Promoted tags
    ("level", String(Modifiers(nullable=True))),
    ("logger", String(Modifiers(nullable=True))),
    ("server_name", String(Modifiers(nullable=True))),
    ("site", String(Modifiers(nullable=True))),
    ("url", String(Modifiers(nullable=True))),
    ("location", String(Modifiers(nullable=True))),
    ("culprit", String(Modifiers(nullable=True))),
    ("received", DateTime(Modifiers(nullable=True))),
    ("sdk_integrations", Array(String(), Modifiers(nullable=True))),
    ("version", String(Modifiers(nullable=True))),
    # exception interface
    (
        "exception_stacks",
        Nested([
            ("type", String(Modifiers(nullable=True))),
            ("value", String(Modifiers(nullable=True))),
            ("mechanism_type", String(Modifiers(nullable=True))),
            ("mechanism_handled", UInt(8, Modifiers(nullable=True))),
        ]),
    ),
    (
        "exception_frames",
        Nested([
Example #27
0
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers

columns: Sequence[Column[Modifiers]] = [
    # Kafka topic offset
    Column("offset", UInt(64)),
    # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion
    # of the record. Having a dedicated clickhouse-only flag to identify this case seems
    # more consistent than add an additional value into the status field below that does not
    # exists on the Sentry side.
    Column("record_deleted", UInt(8)),
    # PG columns
    Column("project_id", UInt(64)),
    Column("id", UInt(64)),
    Column("status", UInt(8, Modifiers(nullable=True))),
    Column("last_seen", DateTime(Modifiers(nullable=True))),
    Column("first_seen", DateTime(Modifiers(nullable=True))),
    Column("active_at", DateTime(Modifiers(nullable=True))),
    Column("first_release_id", UInt(64, Modifiers(nullable=True))),
]


class Migration(migration.ClickhouseNodeMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.CDC,
                table_name="groupedmessage_local",
                columns=columns,
Example #28
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
            ("span_id", UInt(64, Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        super().__init__(
            storages=[discover_storage],
            query_pipeline_builder=discover_pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )
Example #29
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import LowCardinality
from snuba.processor import MAX_UINT32, NIL_UUID

raw_columns = [
    Column("session_id", UUID()),
    Column("distinct_id", UUID()),
    Column("seq", UInt(64)),
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("retention_days", UInt(16)),
    Column("duration", UInt(32)),
    Column("status", UInt(8)),
    Column("errors", UInt(16)),
    Column("received", DateTime()),
    Column("started", DateTime()),
    Column("release", LowCardinality(String())),
    Column("environment", LowCardinality(String())),
]

aggregate_columns = [
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("started", DateTime()),
    Column("release", LowCardinality(String())),
    Column("environment", LowCardinality(String())),
    Column(
        "duration_quantiles",
        AggregateFunction("quantilesIf(0.5, 0.9)", UInt(32), UInt(8)),
    ),
Example #30
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("type", String(Modifiers(nullable=True))),
            ("timestamp", DateTime()),
            ("platform", String(Modifiers(nullable=True))),
            ("environment", String(Modifiers(nullable=True))),
            ("release", String(Modifiers(nullable=True))),
            ("dist", String(Modifiers(nullable=True))),
            ("user", String(Modifiers(nullable=True))),
            ("transaction", String(Modifiers(nullable=True))),
            ("message", String(Modifiers(nullable=True))),
            ("title", String(Modifiers(nullable=True))),
            # User
            ("user_id", String(Modifiers(nullable=True))),
            ("username", String(Modifiers(nullable=True))),
            ("email", String(Modifiers(nullable=True))),
            ("ip_address", String(Modifiers(nullable=True))),
            # SDK
            ("sdk_name", String(Modifiers(nullable=True))),
            ("sdk_version", String(Modifiers(nullable=True))),
            # geo location context
            ("geo_country_code", String(Modifiers(nullable=True))),
            ("geo_region", String(Modifiers(nullable=True))),
            ("geo_city", String(Modifiers(nullable=True))),
            ("http_method", String(Modifiers(nullable=True))),
            ("http_referer", String(Modifiers(nullable=True))),
            # Other tags and context
            ("tags", Nested([("key", String()), ("value", String())])),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("trace_id", String(Modifiers(nullable=True))),
        ])
        self.__events_columns = EVENTS_COLUMNS
        self.__transactions_columns = TRANSACTIONS_COLUMNS

        events_storage = get_storage(StorageKey.EVENTS)

        events_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=EventsQueryStorageSelector(
                    mappers=events_translation_mappers.
                    concat(transaction_translation_mappers).concat(
                        null_function_translation_mappers).concat(
                            TranslationMappers(
                                # XXX: Remove once we are using errors
                                columns=[
                                    ColumnToMapping(None, "release", None,
                                                    "tags", "sentry:release"),
                                    ColumnToMapping(None, "dist", None, "tags",
                                                    "sentry:dist"),
                                    ColumnToMapping(None, "user", None, "tags",
                                                    "sentry:user"),
                                ],
                                subscriptables=[
                                    SubscriptableMapper(
                                        None, "tags", None, "tags"),
                                    SubscriptableMapper(
                                        None, "contexts", None, "contexts"),
                                ],
                            )))), )

        discover_storage = get_storage(StorageKey.DISCOVER)
        discover_storage_plan_builder = SingleStorageQueryPlanBuilder(
            storage=discover_storage,
            mappers=events_translation_mappers.
            concat(transaction_translation_mappers).concat(
                null_function_translation_mappers).concat(
                    TranslationMappers(columns=[
                        ColumnToFunction(
                            None,
                            "ip_address",
                            "coalesce",
                            (
                                FunctionCall(
                                    None,
                                    "IPv4NumToString",
                                    (Column(None, None, "ip_address_v4"), ),
                                ),
                                FunctionCall(
                                    None,
                                    "IPv6NumToString",
                                    (Column(None, None, "ip_address_v6"), ),
                                ),
                            ),
                        ),
                        ColumnToColumn(None, "transaction", None,
                                       "transaction_name"),
                        ColumnToColumn(None, "username", None, "user_name"),
                        ColumnToColumn(None, "email", None, "user_email"),
                        ColumnToMapping(
                            None,
                            "geo_country_code",
                            None,
                            "contexts",
                            "geo.country_code",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_region",
                            None,
                            "contexts",
                            "geo.region",
                            nullable=True,
                        ),
                        ColumnToMapping(
                            None,
                            "geo_city",
                            None,
                            "contexts",
                            "geo.city",
                            nullable=True,
                        ),
                        ColumnToFunction(
                            None,
                            "user",
                            "nullIf",
                            (Column(None, None, "user"), Literal(None, "")),
                        ),
                    ])).concat(
                        TranslationMappers(subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags"),
                            SubscriptableMapper(None, "contexts", None,
                                                "contexts"),
                        ], )),
        )
        discover_pipeline_builder = SimplePipelineBuilder(
            query_plan_builder=discover_storage_plan_builder)

        pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder]
        if settings.ERRORS_ROLLOUT_ALL:
            storage = discover_storage
            sampled_pipeline_builder = SampledSimplePipelineBuilder(
                query_plan_builder=discover_storage_plan_builder)

            pipeline_builder = PipelineDelegator(
                query_pipeline_builders={
                    "primary": discover_pipeline_builder,
                    "sampler": sampled_pipeline_builder,
                },
                selector_func=sampling_selector_func,
                callback_func=sampling_callback_func,
            )
        else:
            storage = events_storage
            pipeline_builder = events_pipeline_builder

        super().__init__(
            storages=[storage],
            query_pipeline_builder=pipeline_builder,
            abstract_column_set=(self.__common_columns +
                                 self.__events_columns +
                                 self.__transactions_columns),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column="timestamp",
        )