def __init__(self) -> None: storage = get_writable_storage(StorageKey.SPANS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=TranslationMappers( subscriptables=[ SubscriptableMapper(None, "tags", None, "tags") ], ), ), ), abstract_column_set=ColumnSet( [ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ] ), join_relationships={ "contained": JoinRelationship( rhs_entity=EntityKey.TRANSACTIONS, columns=[ ("project_id", "project_id"), ("transaction_span_id", "span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("transaction_id", "event_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column=None, )
def __init__(self) -> None: read_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("key_id", Nullable(UInt(64))), ("timestamp", DateTime()), ("outcome", UInt(8)), ("reason", LowCardinality(Nullable(String()))), ("event_id", Nullable(UUID())), ]) read_schema = MergeTreeSchema( columns=read_columns, local_table_name="outcomes_raw_local", dist_table_name="outcomes_raw_dist", order_by="(org_id, project_id, timestamp)", partition_by="(toMonday(timestamp))", settings={"index_granularity": 16384}, migration_function=outcomes_raw_migrations, ) dataset_schemas = DatasetSchemas(read_schema=read_schema, write_schema=None, intermediary_schemas=[]) super().__init__( dataset_schemas=dataset_schemas, time_group_columns={"time": "timestamp"}, time_parse_columns=("timestamp", ), )
def __init__(self): read_columns = ColumnSet([ ('org_id', UInt(64)), ('project_id', UInt(64)), ('key_id', Nullable(UInt(64))), ('timestamp', DateTime()), ('outcome', UInt(8)), ('reason', LowCardinality(Nullable(String()))), ('event_id', Nullable(UUID())), ]) read_schema = MergeTreeSchema( columns=read_columns, local_table_name='outcomes_raw_local', dist_table_name='outcomes_raw_dist', order_by='(org_id, project_id, timestamp)', partition_by='(toMonday(timestamp))', settings={'index_granularity': 16384}) dataset_schemas = DatasetSchemas(read_schema=read_schema, write_schema=None, intermediary_schemas=[]) super().__init__(dataset_schemas=dataset_schemas, time_group_columns={ 'time': 'timestamp', }, time_parse_columns=('timestamp', ))
def __backward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name=table_name, column=Column("trace_id", UUID()), ), ]
def __forward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("trace_id", UUID(Modifiers(nullable=True))), after="contexts", ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("hierarchical_hashes", Array(UUID())), after="primary_hash", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_dist", column=Column("hierarchical_hashes", Array(FixedString(32)),), after="primary_hash", ), ]
def __init__(self) -> None: columns = ColumnSet( [ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", LowCardinality(String())), ( "transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",), ), ("transaction_op", LowCardinality(String())), ("transaction_status", WithDefault(UInt(8), UNKNOWN_SPAN_STATUS)), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("_start_date", Materialized(Date(), "toDate(start_ts)"),), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("_finish_date", Materialized(Date(), "toDate(finish_ts)"),), ("duration", UInt(32)), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''",)), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", WithDefault(LowCardinality(String()), "''")), ("sdk_version", WithDefault(LowCardinality(String()), "''")), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("partition", UInt(16)), ("offset", UInt(64)), ("retention_days", UInt(16)), ("deleted", UInt(8)), ] ) schema = ReplacingMergeTreeSchema( columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", mandatory_conditions=[], prewhere_candidates=["event_id", "project_id"], order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))", partition_by="(retention_days, toMonday(_finish_date))", version_column="deleted", sample_expr=None, migration_function=transactions_migrations, ) dataset_schemas = DatasetSchemas(read_schema=schema, write_schema=schema,) self.__tags_processor = TagColumnProcessor( columns=columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), ) super().__init__( dataset_schemas=dataset_schemas, table_writer=TransactionsTableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=TransactionsMessageProcessor(), default_topic="events", ), ), time_group_columns={ "bucketed_start": "start_ts", "bucketed_end": "finish_ts", }, time_parse_columns=("start_ts", "finish_ts"), )
from abc import ABC from typing import Mapping, Sequence from unittest.mock import Mock from snuba.clickhouse.columns import UUID, ColumnSet, String, UInt from snuba.datasets.entities import EntityKey from snuba.datasets.entity import Entity from snuba.query.data_source.join import ColumnEquivalence, JoinRelationship, JoinType from snuba.query.extensions import QueryExtension from snuba.query.processors import QueryProcessor EVENTS_SCHEMA = ColumnSet([ ("event_id", UUID()), ("project_id", UInt(32)), ("message", String()), ("group_id", UInt(32)), ("user_id", UInt(64)), ]) GROUPS_SCHEMA = ColumnSet([ ("id", UUID()), ("project_id", UInt(32)), ("message", String()), ("user_id", UInt(64)), ]) GROUPS_ASSIGNEE = ColumnSet([ ("group_id", UUID()), ("project_id", UInt(32)), ("message", String()), ("user_id", UInt(64)),
Nullable, String, UInt, UUID, ) from snuba.datasets.dataset_schemas import StorageSchemas from snuba.datasets.querylog_processor import QuerylogProcessor from snuba.datasets.schemas.tables import MergeTreeSchema from snuba.datasets.storage import WritableTableStorage from snuba.datasets.table_storage import TableWriter, KafkaStreamLoader status_type = Enum([("success", 0), ("error", 1), ("rate-limited", 2)]) columns = ColumnSet([ ("request_id", UUID()), ("request_body", String()), ("referrer", LowCardinality(String())), ("dataset", LowCardinality(String())), ("projects", Array(UInt(64))), ("organization", Nullable(UInt(64))), ("timestamp", DateTime()), ("duration_ms", UInt(32)), ("status", status_type), ( "clickhouse_queries", Nested([ ("sql", String()), ("status", status_type), ("trace_id", Nullable(UUID())), ("duration_ms", UInt(32)),
from typing import Sequence from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers raw_columns = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("key_id", UInt(64, Modifiers(nullable=True))), Column("timestamp", DateTime()), Column("outcome", UInt(8)), Column("reason", String(Modifiers(nullable=True, low_cardinality=True))), Column("event_id", UUID(Modifiers(nullable=True))), ] hourly_columns = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("key_id", UInt(64)), Column("timestamp", DateTime()), Column("outcome", UInt(8)), Column("reason", String(Modifiers(low_cardinality=True))), Column("times_seen", UInt(64)), ] materialized_view_columns = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("key_id", UInt(64)),
from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.prewhere import PrewhereProcessor WRITE_LOCAL_TABLE_NAME = "sessions_raw_local" WRITE_DIST_TABLE_NAME = "sessions_raw_dist" READ_LOCAL_TABLE_NAME = "sessions_hourly_local" READ_DIST_TABLE_NAME = "sessions_hourly_dist" READ_LOCAL_MV_NAME = "sessions_hourly_mv_local" READ_DIST_MV_NAME = "sessions_hourly_mv_dist" all_columns = ColumnSet( [ ("session_id", UUID()), ("distinct_id", UUID()), ("quantity", UInt(32)), ("seq", UInt(64)), ("org_id", UInt(64)), ("project_id", UInt(64)), ("retention_days", UInt(16)), ("duration", UInt(32)), ("status", UInt(8)), ("errors", UInt(16)), ("received", DateTime()), ("started", DateTime()), ("release", String()), ("environment", String()), ("user_agent", String()), ("os", String()),
IPv4, IPv6, Nested, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import Materialized from snuba.migrations.columns import MigrationModifiers as Modifiers UNKNOWN_SPAN_STATUS = 2 columns: List[Column[Modifiers]] = [ Column("project_id", UInt(64)), Column("event_id", UUID()), Column("trace_id", UUID()), Column("span_id", UInt(64)), Column("transaction_name", String(Modifiers(low_cardinality=True))), Column( "transaction_hash", UInt(64, Modifiers(materialized="cityHash64(transaction_name)")), ), Column("transaction_op", String(Modifiers(low_cardinality=True))), Column("transaction_status", UInt(8, Modifiers(default=str(UNKNOWN_SPAN_STATUS)))), Column("start_ts", DateTime()), Column("start_ms", UInt(16)), Column("finish_ts", DateTime()), Column("finish_ms", UInt(16)), Column("duration", UInt(32)),
def __init__(self): write_columns = ColumnSet([ ('org_id', UInt(64)), ('project_id', UInt(64)), ('key_id', Nullable(UInt(64))), ('timestamp', DateTime()), ('outcome', UInt(8)), ('reason', LowCardinality(Nullable(String()))), ('event_id', Nullable(UUID())), ]) write_schema = MergeTreeSchema( columns=write_columns, # TODO: change to outcomes.raw_local when we add multi DB support local_table_name=WRITE_LOCAL_TABLE_NAME, dist_table_name=WRITE_DIST_TABLE_NAME, order_by='(org_id, project_id, timestamp)', partition_by='(toMonday(timestamp))', settings={'index_granularity': 16384}) read_columns = ColumnSet([ ('org_id', UInt(64)), ('project_id', UInt(64)), ('key_id', UInt(64)), ('timestamp', DateTime()), ('outcome', UInt(8)), ('reason', LowCardinality(String())), ('times_seen', UInt(64)), ]) read_schema = SummingMergeTreeSchema( columns=read_columns, local_table_name=READ_LOCAL_TABLE_NAME, dist_table_name=READ_DIST_TABLE_NAME, order_by='(org_id, project_id, key_id, outcome, reason, timestamp)', partition_by='(toMonday(timestamp))', settings={'index_granularity': 256}) materialized_view_columns = ColumnSet([ ('org_id', UInt(64)), ('project_id', UInt(64)), ('key_id', UInt(64)), ('timestamp', DateTime()), ('outcome', UInt(8)), ('reason', String()), ('times_seen', UInt(64)), ]) # TODO: Find a better way to specify a query for a materialized view # The problem right now is that we have a way to define our columns in a ColumnSet abstraction but the query # doesn't use it. query = """ SELECT org_id, project_id, ifNull(key_id, 0) AS key_id, toStartOfHour(timestamp) AS timestamp, outcome, ifNull(reason, 'none') AS reason, count() AS times_seen FROM %(source_table_name)s GROUP BY org_id, project_id, key_id, timestamp, outcome, reason """ materialized_view = MaterializedViewSchema( local_materialized_view_name='outcomes_mv_hourly_local', dist_materialized_view_name='outcomes_mv_hourly_dist', columns=materialized_view_columns, query=query, local_source_table_name=WRITE_LOCAL_TABLE_NAME, local_destination_table_name=READ_LOCAL_TABLE_NAME, dist_source_table_name=WRITE_DIST_TABLE_NAME, dist_destination_table_name=READ_DIST_TABLE_NAME) dataset_schemas = DatasetSchemas( read_schema=read_schema, write_schema=write_schema, intermediary_schemas=[materialized_view]) super(OutcomesDataset, self).__init__( dataset_schemas=dataset_schemas, processor=OutcomesProcessor(), default_topic="outcomes", )
import pytest from snuba.clickhouse.columns import UUID, UInt from snuba.datasets.entities import EntityKey from snuba.datasets.entities.entity_data_model import EntityColumnSet from snuba.query import SelectedExpression from snuba.query.composite import CompositeQuery from snuba.query.conditions import ConditionFunctions, binary_condition from snuba.query.data_source.simple import Entity from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.logical import Query from snuba.utils.schemas import Column as EntityColumn from snuba.web.query import ProjectsFinder EVENTS_SCHEMA = EntityColumnSet([ EntityColumn("event_id", UUID()), EntityColumn("project_id", UInt(32)), EntityColumn("group_id", UInt(32)), ]) SIMPLE_QUERY = Query( Entity(EntityKey.EVENTS, EVENTS_SCHEMA), selected_columns=[ SelectedExpression( "alias", Column("_snuba_project", None, "project_id"), ) ], array_join=None, condition=binary_condition( ConditionFunctions.IN,
Enum, Float, LowCardinality, Nested, Nullable, String, UInt, UUID, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines status_type = Enum([("success", 0), ("error", 1), ("rate-limited", 2)]) columns = [ Column("request_id", UUID()), Column("request_body", String()), Column("referrer", LowCardinality(String())), Column("dataset", LowCardinality(String())), Column("projects", Array(UInt(64))), Column("organization", Nullable(UInt(64))), Column("timestamp", DateTime()), Column("duration_ms", UInt(32)), Column("status", status_type), Column( "clickhouse_queries", Nested([ Column("sql", String()), Column("status", status_type), Column("trace_id", Nullable(UUID())), Column("duration_ms", UInt(32)),
DateTime, IPv4, IPv6, Nested, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers columns: Sequence[Column[Modifiers]] = [ Column("project_id", UInt(64)), Column("timestamp", DateTime()), Column("event_id", UUID(Modifiers(codecs=["NONE"]))), Column("platform", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), Column("release", String(Modifiers(nullable=True, low_cardinality=True))), Column("dist", String(Modifiers(nullable=True, low_cardinality=True))), Column("ip_address_v4", IPv4(Modifiers(nullable=True))), Column("ip_address_v6", IPv6(Modifiers(nullable=True))), Column("user", String(Modifiers(default="''"))), Column("user_hash", UInt(64, Modifiers(materialized="cityHash64(user)"))), Column("user_id", String(Modifiers(nullable=True))), Column("user_name", String(Modifiers(nullable=True))), Column("user_email", String(Modifiers(nullable=True))), Column("sdk_name", String(Modifiers(nullable=True, low_cardinality=True))), Column("sdk_version", String(Modifiers(nullable=True, low_cardinality=True))),
from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.replays_processor import ReplaysProcessor from snuba.datasets.schemas.tables import WritableTableSchema from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.conditions_enforcer import ProjectIdEnforcer from snuba.query.processors.table_rate_limit import TableRateLimit from snuba.utils.schemas import Nested from snuba.utils.streams.topics import Topic LOCAL_TABLE_NAME = "replays_local" DIST_TABLE_NAME = "replays_dist" columns = ColumnSet([ ("replay_id", UUID()), ("sequence_id", UInt(16)), ("timestamp", DateTime()), ( "trace_ids", Array(UUID()), ), # TODO: create bloom filter index / materialize column ("title", String(Modifiers(readonly=True))), ### common sentry event columns ("project_id", UInt(64)), # release/environment info ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))),
from typing import Sequence from snuba.clickhouse.columns import ( UUID, Column, DateTime, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers from .matview import aggregate_columns_v1, create_matview_v1 raw_columns: Sequence[Column[Modifiers]] = [ Column("session_id", UUID()), Column("distinct_id", UUID()), Column("seq", UInt(64)), Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("retention_days", UInt(16)), Column("duration", UInt(32)), Column("status", UInt(8)), Column("errors", UInt(16)), Column("received", DateTime()), Column("started", DateTime()), Column("release", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(low_cardinality=True))), ]
from snuba.datasets.storages.factory import get_writable_storage from snuba.pipeline.simple_pipeline import SimplePipelineBuilder from snuba.query.processors import QueryProcessor from snuba.query.processors.object_id_rate_limiter import ( OrganizationRateLimiterProcessor, ProjectRateLimiterProcessor, ProjectReferrerRateLimiter, ReferrerRateLimiterProcessor, ) from snuba.query.processors.quota_processor import ResourceQuotaProcessor from snuba.query.validation.validators import EntityRequiredColumnValidator profile_columns = EntityColumnSet([ Column("organization_id", UInt(64)), Column("project_id", UInt(64)), Column("transaction_id", UUID()), Column("profile_id", UUID()), Column("received", DateTime()), Column("profile", String()), Column("android_api_level", UInt(32, Modifiers(nullable=True))), Column("device_classification", String()), Column("device_locale", String()), Column("device_manufacturer", String()), Column("device_model", String()), Column("device_os_build_number", String(Modifiers(nullable=True))), Column("device_os_name", String()), Column("device_os_version", String()), Column("duration_ns", UInt(64)), Column("environment", String(Modifiers(nullable=True))), Column("platform", String()), Column("trace_id", UUID()),
from snuba.clickhouse.columns import UUID, Array, ColumnSet, DateTime, Float from snuba.clickhouse.columns import SchemaModifiers as Modifiers from snuba.clickhouse.columns import String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.querylog_processor import QuerylogProcessor from snuba.datasets.schemas.tables import WritableTableSchema from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.utils.streams.topics import Topic columns = ColumnSet([ ("request_id", UUID()), ("request_body", String()), ("referrer", String()), ("dataset", String()), ("projects", Array(UInt(64))), ("organization", UInt(64, Modifiers(nullable=True))), ("timestamp", DateTime()), ("duration_ms", UInt(32)), ("status", String()), # clickhouse_queries Nested columns. # This is expanded into arrays instead of being expressed as a # Nested column because, when adding new columns to a nested field # we need to provide a default for the entire array (each new column # is an array). # The same schema cannot be achieved with the Nested construct (where # we can only provide default for individual values), so, if we # use the Nested construct, this schema cannot match the one generated # by the migration framework (or by any ALTER statement). ("clickhouse_queries.sql", Array(String())),
from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy required_columns = [ "event_id", "primary_hash", "project_id", "group_id", "timestamp", "deleted", "retention_days", ] all_columns = ColumnSet([ ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", UUID()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))),
FixedString, IPv4, IPv6, Nested, String, UInt, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers columns: Sequence[Column[Modifiers]] = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("timestamp", DateTime()), Column("event_id", (UUID(Modifiers(codecs=["NONE"])))), Column( "event_hash", UInt( 64, Modifiers(materialized="cityHash64(toString(event_id))", codecs=["NONE"]), ), ), Column("platform", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(nullable=True, low_cardinality=True))), Column("release", String(Modifiers(nullable=True, low_cardinality=True))), Column("dist", String(Modifiers(nullable=True, low_cardinality=True))), Column("ip_address_v4", IPv4(Modifiers(nullable=True))), Column("ip_address_v6", IPv6(Modifiers(nullable=True))),
UInt, WithDefault, ) from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.schemas.tables import WritableTableSchema from snuba.datasets.spans_processor import UNKNOWN_SPAN_STATUS, SpansMessageProcessor from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN from snuba.datasets.table_storage import KafkaStreamLoader from snuba.query.processors.prewhere import PrewhereProcessor from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet([ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", Nullable(UInt(64))), ("transaction_name", LowCardinality(String())), ("description", String()), # description in span ("op", LowCardinality(String())), ( "status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)), ), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)),
) if "http_referer" not in current_schema: ret.append( f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method" ) return ret all_columns = ColumnSet( [ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized(UInt(64), "cityHash64(toString(event_id))",), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())),
Column("project_id", UInt(64)), Column("started", DateTime()), Column("release", String(Modifiers(low_cardinality=True))), Column("environment", String(Modifiers(low_cardinality=True))), Column("user_agent", String(Modifiers(low_cardinality=True))), Column("os", String(Modifiers(low_cardinality=True))), # durations Column( "duration_quantiles", AggregateFunction("quantilesIf(0.5, 0.9)", [UInt(32), UInt(8)]), ), Column("duration_avg", AggregateFunction("avgIf", [UInt(32), UInt(8)])), # sessions: Column("sessions", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), Column("sessions_crashed", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_crashed_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), Column("sessions_abnormal", AggregateFunction("countIf", [UUID(), UInt(8)])), Column("sessions_abnormal_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), Column("sessions_errored", AggregateFunction("uniqIf", [UUID(), UInt(8)])), Column("sessions_errored_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])), # users:
from snuba.datasets.storages.errors_common import mandatory_conditions from snuba.datasets.storages.events_bool_contexts import EventsBooleanContextsProcessor from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.mapping_promoter import MappingColumnPromoter from snuba.query.processors.prewhere import PrewhereProcessor from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, ) from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy columns = ColumnSet( [ ("event_id", UUID()), ("project_id", UInt(64)), ("type", String()), ("timestamp", DateTime()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("transaction_name", String()), ("message", String()), ("title", String()), ("user", String()), ("user_hash", UInt(64)), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))),
from snuba.clickhouse.columns import SchemaModifiers as Modifiers from snuba.clickhouse.columns import String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.schemas.tables import TableSchema from snuba.datasets.storage import ReadableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.storages.errors_common import mandatory_conditions from snuba.datasets.storages.event_id_column_processor import EventIdColumnProcessor from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.prewhere import PrewhereProcessor from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet([ ("event_id", UUID()), ("project_id", UInt(64)), ("type", String()), ("timestamp", DateTime()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("transaction_name", String()), ("message", String()), ("title", String()), ("user", String()), ("user_hash", UInt(64)), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))),
def __init__(self): columns = ColumnSet([ ('project_id', UInt(64)), ('event_id', UUID()), ('trace_id', UUID()), ('span_id', UInt(64)), ('transaction_name', String()), ('transaction_hash', Materialized( UInt(64), 'cityHash64(transaction_name)', )), ('transaction_op', LowCardinality(String())), ('start_ts', DateTime()), ('start_ms', UInt(16)), ('finish_ts', DateTime()), ('finish_ms', UInt(16)), ('duration', Materialized( UInt(32), '((finish_ts - start_ts) * 1000) + (finish_ms - start_ms)', )), ('platform', LowCardinality(String())), ('environment', Nullable(String())), ('release', Nullable(String())), ('dist', Nullable(String())), ('ip_address_v4', Nullable(IPv4())), ('ip_address_v6', Nullable(IPv6())), ('user', WithDefault( String(), "''", )), ('user_id', Nullable(String())), ('user_name', Nullable(String())), ('user_email', Nullable(String())), ('tags', Nested([ ('key', String()), ('value', String()), ])), ('contexts', Nested([ ('key', String()), ('value', String()), ])), ('partition', UInt(16)), ('offset', UInt(64)), ('retention_days', UInt(16)), ('deleted', UInt(8)), ]) schema = ReplacingMergeTreeSchema( columns=columns, local_table_name='transactions_local', dist_table_name='transactions_dist', order_by= '(project_id, toStartOfDay(start_ts), transaction_hash, start_ts, start_ms, trace_id, span_id)', partition_by='(retention_days, toMonday(start_ts))', version_column='deleted', sample_expr=None, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) super().__init__( dataset_schemas=dataset_schemas, processor=TransactionsMessageProcessor(), default_topic="events", time_group_columns={ 'bucketed_start': 'start_ts', 'bucketed_end': 'finish_ts', }, )
ret.append( f"ALTER TABLE {clickhouse_table} ADD COLUMN http_method LowCardinality(Nullable(String)) AFTER sdk_version" ) if "http_referer" not in current_schema: ret.append( f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method" ) return ret columns = ColumnSet( [ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", LowCardinality(String())), ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",),), ("transaction_op", LowCardinality(String())), ("transaction_status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS))), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("duration", UInt(32)), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))),
def __init__(self) -> None: self.__common_columns = ColumnSet( [ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", Nullable(String())), ("timestamp", DateTime()), ("platform", Nullable(String())), ("environment", Nullable(String())), ("release", Nullable(String())), ("dist", Nullable(String())), ("user", Nullable(String())), ("transaction", Nullable(String())), ("message", Nullable(String())), ("title", Nullable(String())), # User ("user_id", Nullable(String())), ("username", Nullable(String())), ("email", Nullable(String())), ("ip_address", Nullable(String())), # SDK ("sdk_name", Nullable(String())), ("sdk_version", Nullable(String())), # geo location context ("geo_country_code", Nullable(String())), ("geo_region", Nullable(String())), ("geo_city", Nullable(String())), ("http_method", Nullable(String())), ("http_referer", Nullable(String())), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ] ) self.__events_columns = ColumnSet( [ ("group_id", Nullable(UInt(64))), ("primary_hash", Nullable(FixedString(32))), # Promoted tags ("level", Nullable(String())), ("logger", Nullable(String())), ("server_name", Nullable(String())), ("site", Nullable(String())), ("url", Nullable(String())), ("search_message", Nullable(String())), ("location", Nullable(String())), ("culprit", Nullable(String())), ("received", Nullable(DateTime())), ("sdk_integrations", Nullable(Array(String()))), ("version", Nullable(String())), # exception interface ( "exception_stacks", Nested( [ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ] ), ), ( "exception_frames", Nested( [ ("abs_path", Nullable(String())), ("filename", Nullable(String())), ("package", Nullable(String())), ("module", Nullable(String())), ("function", Nullable(String())), ("in_app", Nullable(UInt(8))), ("colno", Nullable(UInt(32))), ("lineno", Nullable(UInt(32))), ("stack_level", UInt(16)), ] ), ), ("modules", Nested([("name", String()), ("version", String())])), ] ) self.__transactions_columns = ColumnSet( [ ("trace_id", Nullable(UUID())), ("span_id", Nullable(UInt(64))), ("transaction_hash", Nullable(UInt(64))), ("transaction_op", Nullable(String())), ("transaction_status", Nullable(UInt(8))), ("duration", Nullable(UInt(32))), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ] ) events_storage = get_storage(StorageKey.EVENTS) events_ro_storage = get_storage(StorageKey.EVENTS_RO) transactions_storage = get_storage(StorageKey.TRANSACTIONS) self.__time_group_columns: Mapping[str, str] = {} self.__time_parse_columns = ("timestamp",) super().__init__( storages=[events_storage, transactions_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=DiscoverQueryStorageSelector( events_table=events_storage, events_ro_table=events_ro_storage, abstract_events_columns=self.__events_columns, transactions_table=transactions_storage, abstract_transactions_columns=self.__transactions_columns, ), ), abstract_column_set=( self.__common_columns + self.__events_columns + self.__transactions_columns ), writable_storage=None, )