def fix_order_by() -> None:
    cluster = get_cluster(StorageSetKey.EVENTS)

    if not cluster.is_single_node():
        return

    clickhouse = cluster.get_query_connection(ClickhouseClientSettings.MIGRATE)
    database = cluster.get_database()

    new_primary_key = "project_id, id"
    old_primary_key = "id"

    ((curr_primary_key, ), ) = clickhouse.execute(
        f"SELECT primary_key FROM system.tables WHERE name = '{TABLE_NAME}' AND database = '{database}'"
    )

    assert curr_primary_key in [
        new_primary_key,
        old_primary_key,
    ], "Groupmessage table has invalid primary key"

    if curr_primary_key != old_primary_key:
        return

    # Add the project_id column
    add_column_sql = operations.AddColumn(
        storage_set=StorageSetKey.EVENTS,
        table_name=TABLE_NAME,
        column=Column("project_id", UInt(64)),
        after="record_deleted",
    ).format_sql()

    clickhouse.execute(add_column_sql)

    # There shouldn't be any data in the table yet
    assert (clickhouse.execute(f"SELECT COUNT() FROM {TABLE_NAME} FINAL;")[0]
            [0] == 0), f"{TABLE_NAME} is not empty"

    new_order_by = f"ORDER BY ({new_primary_key})"
    old_order_by = f"ORDER BY {old_primary_key}"

    ((curr_create_table_statement, ),
     ) = clickhouse.execute(f"SHOW CREATE TABLE {database}.{TABLE_NAME}")

    new_create_table_statement = curr_create_table_statement.replace(
        TABLE_NAME, TABLE_NAME_NEW).replace(old_order_by, new_order_by)

    clickhouse.execute(new_create_table_statement)

    clickhouse.execute(f"RENAME TABLE {TABLE_NAME} TO {TABLE_NAME_OLD};")

    clickhouse.execute(f"RENAME TABLE {TABLE_NAME_NEW} TO {TABLE_NAME};")

    clickhouse.execute(f"DROP TABLE {TABLE_NAME_OLD};")
예제 #2
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
        )
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_dist",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64)),
             ),
             after="_tags_flattened",
         ),
     ]
예제 #4
0
 def forwards_dist(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64)),
             ),
             after="_tags_flattened",
         ),
     ]
예제 #5
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return get_forward_migrations_local(
         source_table_name="metrics_buckets_local",
         table_name="metrics_sets_local",
         mv_name="metrics_sets_mv_local",
         aggregation_col_schema=[
             Column("value", AggregateFunction("uniqCombined64",
                                               [UInt(64)])),
         ],
         aggregation_states=
         "uniqCombined64State(arrayJoin(set_values)) as value",
     )
예제 #6
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_local",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)),
             ),
             after="_tags_flattened",
         ),
     ]
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column(
                 "_tags_hash_map",
                 Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN),
             ),
             after="_tags_flattened",
         ),
     ]
예제 #8
0
def test_joined_columns():
    schema = JoinedSchema(complex_join_structure)
    columns = schema.get_columns()

    expected_columns = ColumnSet([
        ("t1.t1c1", UInt(64)),
        ("t1.t1c2", String()),
        ("t1.t1c3", Nested([
            ("t11c4", UInt(64))
        ])),
        ("t2.t2c1", UInt(64)),
        ("t2.t2c2", String()),
        ("t2.t2c3", Nested([
            ("t21c4", UInt(64))
        ])),
        ("t3.t3c1", UInt(64)),
        ("t3.t3c2", String()),
        ("t3.t3c3", Nested([
            ("t31c4", UInt(64))
        ])),
    ])

    # Checks equality between flattened columns. Nested columns are
    # exploded here
    assert set([c.flattened for c in columns]) \
        == set([c.flattened for c in expected_columns])

    # Checks equality between the structured set of columns. Nested columns
    # are not exploded.
    assert set([repr(c) for c in columns.columns]) \
        == set([repr(c) for c in expected_columns.columns])
예제 #9
0
파일: metrics.py 프로젝트: getsentry/snuba
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_RAW,
         readable_storage_key=StorageKey.METRICS_SETS,
         value_schema=[
             Column("value", AggregateFunction("uniqCombined64",
                                               [UInt(64)])),
         ],
         mappers=TranslationMappers(functions=[
             FunctionNameMapper("uniq", "uniqCombined64Merge"),
             FunctionNameMapper("uniqIf", "uniqCombined64MergeIf"),
         ], ),
     )
예제 #10
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_dist",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64),
                       Modifiers(materialized=TAGS_HASH_MAP_COLUMN)),
             ),
             after="_tags_flattened",
         ),
     ]
예제 #11
0
def test_join_query() -> None:
    events_query = LogicalQuery(
        Entity(
            EntityKey.EVENTS,
            ColumnSet([("event_id", String()), ("group_id", UInt(32))]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None,
                                                  "group_id")),
            SelectedExpression("string_evt_id",
                               Column("string_evt_id", None, "event_id")),
        ],
    )

    groups_query = LogicalQuery(
        Entity(
            EntityKey.GROUPEDMESSAGES,
            ColumnSet([("id", UInt(32)), ("message", String())]),
        ),
        selected_columns=[
            SelectedExpression("group_id", Column("group_id", None, "id"))
        ],
    )

    join_query = CompositeQuery(from_clause=JoinClause(
        left_node=IndividualNode("e", events_query),
        right_node=IndividualNode("g", groups_query),
        keys=[
            JoinCondition(
                left=JoinConditionExpression("e", "group_id"),
                right=JoinConditionExpression("g", "group_id"),
            )
        ],
        join_type=JoinType.INNER,
    ))

    data_source = join_query.get_from_clause()
    assert "e.string_evt_id" in data_source.get_columns()
    assert "g.group_id" in data_source.get_columns()
예제 #12
0
 def __init__(self) -> None:
     super().__init__(
         writable_storage_key=StorageKey.METRICS_BUCKETS,
         readable_storage_key=StorageKey.METRICS_SETS,
         value_schema=[
             Column("value", AggregateFunction("uniqCombined64",
                                               [UInt(64)])),
         ],
         mappers=TranslationMappers(columns=[
             ColumnToFunction(
                 None,
                 "value",
                 "uniqCombined64Merge",
                 (ColumnExpr(None, None, "value"), ),
             ),
         ], ),
     )
예제 #13
0
def get_migration_args_for_sets(
    granularity: int = ORIGINAL_GRANULARITY, ) -> MigrationArgs:
    return {
        "source_table_name":
        "metrics_buckets_local",
        "table_name":
        "metrics_sets_local",
        "mv_name":
        get_mv_name("sets", granularity),
        "aggregation_col_schema": [
            Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        ],
        "aggregation_states":
        "uniqCombined64State(arrayJoin(set_values)) as value",
        "granularity":
        granularity,
    }
예제 #14
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         *self.__forward_migrations("metrics_distributions_local"),
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_distributions_local",
             mv_name=get_polymorphic_mv_v3_name("distributions"),
             aggregation_col_schema=COL_SCHEMA_DISTRIBUTIONS_V2,
             aggregation_states=
             ("quantilesState(0.5, 0.75, 0.9, 0.95, 0.99)((arrayJoin(distribution_values) AS values_rows)) as percentiles, "
              "minState(values_rows) as min, "
              "maxState(values_rows) as max, "
              "avgState(values_rows) as avg, "
              "sumState(values_rows) as sum, "
              "countState(values_rows) as count, "
              "histogramState(250)(values_rows) as histogram_buckets"),
             metric_type="distribution",
             materialization_version=4,
         ),
         # No changes in those MV's schema. We just need to recreate the
         # same exact MV as in 0023 for the new materialization_version
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_sets_local",
             mv_name=get_polymorphic_mv_v3_name("sets"),
             aggregation_col_schema=[
                 Column("value",
                        AggregateFunction("uniqCombined64", [UInt(64)])),
             ],
             aggregation_states=
             "uniqCombined64State(arrayJoin(set_values)) as value",
             metric_type="set",
             materialization_version=4,
         ),
         get_forward_view_migration_polymorphic_table_v2(
             source_table_name=self.raw_table_name,
             table_name="metrics_counters_local",
             mv_name=get_polymorphic_mv_v3_name("counters"),
             aggregation_col_schema=[
                 Column("value", AggregateFunction("sum", [Float(64)])),
             ],
             aggregation_states="sumState(count_value) as value",
             metric_type="counter",
             materialization_version=4,
         ),
     ]
예제 #15
0
def test_format_expressions(query: Query, expected_query: Query) -> None:
    processor = CustomFunction(
        ColumnSet([("param1", String()), ("param2", UInt(8)), ("other_col", String())]),
        "f_call",
        [("param1", ColType({String})), ("param2", ColType({UInt}))],
        partial_function(
            "f_call_impl(param1, inner_call(param2), my_const)", [("my_const", 420)],
        ),
    )
    # We cannot just run == on the query objects. The content of the two
    # objects is different, being one the AST and the ont the AST + raw body
    processor.process_query(query, HTTPRequestSettings())
    assert query.get_selected_columns() == expected_query.get_selected_columns()
    assert query.get_groupby() == expected_query.get_groupby()
    assert query.get_condition() == expected_query.get_condition()
    assert query.get_arrayjoin() == expected_query.get_arrayjoin()
    assert query.get_having() == expected_query.get_having()
    assert query.get_orderby() == expected_query.get_orderby()
예제 #16
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.METRICS,
             table_name=self.table_name,
             columns=self.aggregated_cols,
             engine=table_engines.AggregatingMergeTree(
                 storage_set=StorageSetKey.METRICS,
                 order_by=
                 "(use_case_id, org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.value, retention_days)",
                 primary_key=
                 "(use_case_id, org_id, project_id, metric_id, granularity, timestamp)",
                 partition_by="(retention_days, toMonday(timestamp))",
                 settings={"index_granularity": self.granularity},
                 ttl="timestamp + toIntervalDay(retention_days)",
             ),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.METRICS,
             table_name=self.table_name,
             column=Column(
                 "_tags_hash",
                 Array(UInt(64),
                       Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)),
             ),
             after="tags.value",
         ),
         operations.AddIndex(
             storage_set=StorageSetKey.METRICS,
             table_name=self.table_name,
             index_name="bf_tags_hash",
             index_expression="_tags_hash",
             index_type="bloom_filter()",
             granularity=1,
         ),
         operations.AddIndex(
             storage_set=StorageSetKey.METRICS,
             table_name=self.table_name,
             index_name="bf_tags_key_hash",
             index_expression="tags.key",
             index_type="bloom_filter()",
             granularity=1,
         ),
     ]
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         get_forward_view_migration_polymorphic_table_v3(
             source_table_name=self.raw_table_name,
             table_name=self.table_name,
             aggregation_col_schema=[
                 Column("value",
                        AggregateFunction("uniqCombined64", [UInt(64)])),
             ],
             aggregation_states=
             "uniqCombined64State(arrayJoin(set_values)) as value",
             mv_name=get_polymorphic_mv_variant_name(
                 "sets", self.mv_version),
             metric_type="set",
             target_mat_version=4,
             appended_where_clause=
             "AND timestamp > toDateTime('2022-03-29 00:00:00')",
         )
     ]
예제 #18
0
    def __init__(self) -> None:
        columns = ColumnSet([
            # columns to maintain the dataset
            # Kafka topic offset
            ("offset", UInt(64)),
            # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion
            # of the record. Having a dedicated clickhouse-only flag to identify this case seems
            # more consistent than add an additional value into the status field below that does not
            # exists on the Sentry side.
            ("record_deleted", UInt(8)),
            # PG columns
            ("project_id", UInt(64)),
            ("id", UInt(64)),
            ("status", Nullable(UInt(8))),
            ("last_seen", Nullable(DateTime())),
            ("first_seen", Nullable(DateTime())),
            ("active_at", Nullable(DateTime())),
            ("first_release_id", Nullable(UInt(64))),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name="groupedmessage_local",
            dist_table_name="groupedmessage_dist",
            mandatory_conditions=[("record_deleted", "=", 0)],
            prewhere_candidates=["project_id", "id"],
            order_by="(project_id, id)",
            partition_by=None,
            version_column="offset",
            sample_expr="id",
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=GroupedMessageTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=GroupedMessageProcessor(self.POSTGRES_TABLE),
                    default_topic="cdc",
                ),
                postgres_table=self.POSTGRES_TABLE,
            ),
            default_control_topic="cdc_control",
            postgres_table=self.POSTGRES_TABLE,
        )
예제 #19
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.METRICS,
             table_name=self.dist_table_name,
             columns=self.aggregated_cols,
             engine=table_engines.Distributed(
                 local_table_name=self.table_name, sharding_key=None),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.METRICS,
             table_name=self.dist_table_name,
             column=Column(
                 "_tags_hash",
                 Array(UInt(64),
                       Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)),
             ),
             after="tags.value",
         ),
     ]
예제 #20
0
def test_create_table() -> None:
    columns = [
        Column("id", String()),
        Column("name", Nullable(String())),
        Column("version", UInt(64)),
    ]

    assert (
        CreateTable(
            StorageSetKey.EVENTS,
            "test_table",
            columns,
            ReplacingMergeTree(
                storage_set=StorageSetKey.EVENTS,
                version_column="version",
                order_by="version",
                settings={"index_granularity": "256"},
            ),
        ).format_sql() ==
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;"
    )
예제 #21
0
 def forwards_dist(self) -> Sequence[operations.Operation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="spans_experimental_dist",
             columns=columns,
             engine=table_engines.Distributed(
                 local_table_name="spans_experimental_local",
                 sharding_key="cityHash64(transaction_span_id)",
             ),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="spans_experimental_dist",
             column=Column(
                 "_tags_hash_map",
                 Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN),
             ),
             after="tags.value",
         ),
     ]
예제 #22
0
 def forwards_dist(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_dist_new",
             columns=columns,
             engine=table_engines.Distributed(
                 local_table_name="errors_local",
                 sharding_key=sample_expr,
             ),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_dist_new",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64),
                       Modifiers(materialized=TAGS_HASH_MAP_COLUMN)),
             ),
             after="tags",
         ),
         operations.DropTable(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_dist",
         ),
         operations.RenameTable(
             storage_set=StorageSetKey.EVENTS,
             old_table_name="errors_dist_new",
             new_table_name="errors_dist",
         ),
         operations.CreateTable(
             storage_set=StorageSetKey.EVENTS_RO,
             table_name="errors_dist_ro",
             columns=columns,
             engine=table_engines.Distributed(
                 local_table_name="errors_local",
                 sharding_key=sample_expr,
             ),
         ),
     ]
예제 #23
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_local_new",
             columns=columns,
             engine=table_engines.ReplacingMergeTree(
                 storage_set=StorageSetKey.EVENTS,
                 version_column="deleted",
                 order_by=
                 "(project_id, toStartOfDay(timestamp), primary_hash, %s)" %
                 sample_expr,
                 partition_by="(retention_days, toMonday(timestamp))",
                 sample_by=sample_expr,
                 ttl="timestamp + toIntervalDay(retention_days)",
                 settings={"index_granularity": "8192"},
             ),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_local_new",
             column=Column(
                 "_tags_hash_map",
                 Array(UInt(64),
                       Modifiers(materialized=TAGS_HASH_MAP_COLUMN)),
             ),
             after="tags",
         ),
         operations.DropTable(
             storage_set=StorageSetKey.EVENTS,
             table_name="errors_local",
         ),
         operations.RenameTable(
             storage_set=StorageSetKey.EVENTS,
             old_table_name="errors_local_new",
             new_table_name="errors_local",
         ),
     ]
예제 #24
0
def get_forward_migrations_dist(
    dist_table_name: str,
    local_table_name: str,
    aggregation_col_schema: Sequence[Column[Modifiers]],
) -> Sequence[operations.SqlOperation]:
    return [
        operations.CreateTable(
            storage_set=StorageSetKey.METRICS,
            table_name=dist_table_name,
            columns=[*COMMON_AGGR_COLUMNS, *aggregation_col_schema],
            engine=table_engines.Distributed(local_table_name=local_table_name,
                                             sharding_key=None),
        ),
        operations.AddColumn(
            storage_set=StorageSetKey.METRICS,
            table_name=dist_table_name,
            column=Column(
                "_tags_hash",
                Array(UInt(64),
                      Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)),
            ),
            after="tags.value",
        ),
    ]
예제 #25
0
def test_create_table() -> None:
    database = os.environ.get("CLICKHOUSE_DATABASE", "default")
    columns = [
        Column("id", String()),
        Column("name", String(Modifiers(nullable=True))),
        Column("version", UInt(64)),
    ]

    assert CreateTable(
        StorageSetKey.EVENTS,
        "test_table",
        columns,
        ReplacingMergeTree(
            storage_set=StorageSetKey.EVENTS,
            version_column="version",
            order_by="version",
            settings={"index_granularity": "256"},
        ),
    ).format_sql() in [
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;",
        "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplicatedReplacingMergeTree('/clickhouse/tables/events/{shard}/"
        + f"{database}/test_table'"
        + ", '{replica}', version) ORDER BY version SETTINGS index_granularity=256;",
    ]
예제 #26
0
    def __init__(self):
        columns = ColumnSet([
            # columns to maintain the dataset
            # Kafka topic offset
            ('offset', UInt(64)),
            # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion
            # of the record. Having a dedicated clickhouse-only flag to identify this case seems
            # more consistent than add an additional value into the status field below that does not
            # exists on the Sentry side.
            ('record_deleted', UInt(8)),
            # PG columns
            ('project_id', UInt(64)),
            ('id', UInt(64)),
            ('status', Nullable(UInt(8))),
            ('last_seen', Nullable(DateTime())),
            ('first_seen', Nullable(DateTime())),
            ('active_at', Nullable(DateTime())),
            ('first_release_id', Nullable(UInt(64))),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='groupedmessage_local',
            dist_table_name='groupedmessage_dist',
            order_by='(project_id, id)',
            partition_by=None,
            version_column='offset',
            sample_expr='id',
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super(GroupedMessageDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            processor=GroupedMessageProcessor(self.POSTGRES_TABLE),
            default_topic="cdc",
            default_replacement_topic=None,
            default_commit_log_topic=None,
            default_control_topic="cdc_control",
        )
예제 #27
0
    def __init__(self) -> None:
        columns = ColumnSet([
            # columns to maintain the dataset
            # Kafka topic offset
            ("offset", UInt(64)),
            ("record_deleted", UInt(8)),
            # PG columns
            ("project_id", UInt(64)),
            ("group_id", UInt(64)),
            ("date_added", Nullable(DateTime())),
            ("user_id", Nullable(UInt(64))),
            ("team_id", Nullable(UInt(64))),
        ])

        schema = ReplacingMergeTreeSchema(
            columns=columns,
            local_table_name='groupassignee_local',
            dist_table_name='groupassignee_dist',
            order_by='(project_id, group_id)',
            partition_by=None,
            version_column='offset',
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=GroupAssigneeTableWriter(
                write_schema=schema,
                stream_loader=KafkaStreamLoader(
                    processor=GroupAssigneeProcessor(self.POSTGRES_TABLE),
                    default_topic="cdc",
                ),
                postgres_table=self.POSTGRES_TABLE,
            ),
            default_control_topic="cdc_control",
            postgres_table=self.POSTGRES_TABLE,
        )
예제 #28
0
from snuba.pipeline.simple_pipeline import SimplePipelineBuilder
from snuba.query.processors import QueryProcessor
from snuba.query.processors.basic_functions import BasicFunctionsProcessor
from snuba.query.processors.object_id_rate_limiter import (
    OrganizationRateLimiterProcessor,
    ReferrerRateLimiterProcessor,
)
from snuba.query.processors.timeseries_processor import TimeSeriesProcessor
from snuba.query.validation.validators import (
    ColumnValidationMode,
    EntityRequiredColumnValidator,
)
from snuba.utils.schemas import Column

outcomes_data_model = EntityColumnSet([
    Column("org_id", UInt(64)),
    Column("project_id", UInt(64)),
    Column("key_id", UInt(64)),
    Column("timestamp", DateTime()),
    Column("outcome", UInt(8)),
    Column("reason", String()),
    Column("quantity", UInt(64)),
    Column("category", UInt(8)),
    Column("times_seen", UInt(64)),
    Column("time", DateTime()),
])


class OutcomesEntity(Entity):
    """
    Tracks event ingestion outcomes in Sentry.
예제 #29
0
    if "http_method" not in current_schema:
        ret.append(
            f"ALTER TABLE {clickhouse_table} ADD COLUMN http_method LowCardinality(Nullable(String)) AFTER sdk_version"
        )

    if "http_referer" not in current_schema:
        ret.append(
            f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method"
        )

    return ret


columns = ColumnSet(
    [
        ("project_id", UInt(64)),
        ("event_id", UUID()),
        ("trace_id", UUID()),
        ("span_id", UInt(64)),
        ("transaction_name", LowCardinality(String())),
        ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",),),
        ("transaction_op", LowCardinality(String())),
        ("transaction_status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS))),
        ("start_ts", DateTime()),
        ("start_ms", UInt(16)),
        ("finish_ts", DateTime()),
        ("finish_ms", UInt(16)),
        ("duration", UInt(32)),
        ("platform", LowCardinality(String())),
        ("environment", LowCardinality(Nullable(String()))),
        ("release", LowCardinality(Nullable(String()))),
예제 #30
0
from typing import Sequence

from snuba.clickhouse.columns import Column, DateTime, UInt
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers

columns: Sequence[Column[Modifiers]] = [
    # Kafka topic offset
    Column("offset", UInt(64)),
    Column("record_deleted", UInt(8)),
    # PG columns
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("date_added", DateTime(Modifiers(nullable=True))),
    Column("user_id", UInt(64, Modifiers(nullable=True))),
    Column("team_id", UInt(64, Modifiers(nullable=True))),
]


class Migration(migration.ClickhouseNodeMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.SqlOperation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.CDC,
                table_name="groupassignee_local",
                columns=columns,
                engine=table_engines.ReplacingMergeTree(
                    storage_set=StorageSetKey.CDC,