Exemple #1
0
    def __init__(
        self,
        schema: TableSchema,
        host,
        port,
        encoder: Callable[[WriterTableRow], bytes],
        options=None,
        table_name=None,
        chunk_size: int = 1,
    ):
        """
        Builds a writer to send a batch to Clickhouse.

        :param schema: The dataset schema to take the table name from
        :param host: Clickhosue host
        :param port: Clickhosue port
        :param encoder: A function that will be applied to each row to turn it into bytes
        :param options: options passed to Clickhouse
        :param table_name: Overrides the table coming from the schema (generally used for uplaoding
            on temporary tables)
        :param chunk_size: The chunk size (in rows).
            We send data to the server with Transfer-Encoding: chunked. If 0 we send the entire
            content in one chunk.
        """
        self.__pool = HTTPConnectionPool(host, port)
        self.__options = options if options is not None else {}
        self.__table_name = table_name or schema.get_table_name()
        self.__chunk_size = chunk_size
        self.__encoder = encoder
Exemple #2
0
        ("sdk_name", String(Modifiers(nullable=True))),
        ("sdk_version", String(Modifiers(nullable=True))),
        ("http_method", String(Modifiers(nullable=True))),
        ("http_referer", String(Modifiers(nullable=True))),
        ("tags", Nested([("key", String()), ("value", String())])),
        ("_tags_hash_map", Array(UInt(64))),
        ("contexts", Nested([("key", String()), ("value", String())])),
        ("trace_id", UUID(Modifiers(nullable=True))),
        ("deleted", UInt(8)),
    ]
)

schema = TableSchema(
    columns=columns,
    local_table_name="discover_local",
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
    mandatory_conditions=mandatory_conditions,
)

storage = ReadableTableStorage(
    storage_key=StorageKey.DISCOVER,
    storage_set_key=StorageSetKey.DISCOVER,
    schema=schema,
    query_processors=[
        MappingColumnPromoter(
            mapping_specs={
                "tags": {
                    "environment": "environment",
                    "sentry:release": "release",
                    "sentry:dist": "dist",
Exemple #3
0
    ("http_method", String(Modifiers(nullable=True))),
    ("http_referer", String(Modifiers(nullable=True))),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_hash_map", Array(UInt(64))),
    ("contexts", Nested([("key", String()), ("value", String())])),
    ("deleted", UInt(8)),
])

schema = TableSchema(
    columns=columns,
    local_table_name="discover_local",
    dist_table_name="discover_dist",
    storage_set_key=StorageSetKey.DISCOVER,
    mandatory_conditions=mandatory_conditions,
    prewhere_candidates=[
        "event_id",
        "release",
        "message",
        "transaction_name",
        "environment",
        "project_id",
    ],
)

storage = ReadableTableStorage(
    storage_key=StorageKey.DISCOVER,
    storage_set_key=StorageSetKey.DISCOVER,
    schema=schema,
    query_processors=[
        MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"),
        EventIdColumnProcessor(),
Exemple #4
0
        ("sessions_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])),
        ("sessions_crashed", AggregateFunction("countIf", [UUID(), UInt(8)])),
        ("sessions_crashed_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])),
        ("sessions_abnormal", AggregateFunction("countIf", [UUID(), UInt(8)])),
        ("sessions_abnormal_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])),
        ("sessions_errored", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
        ("sessions_errored_preaggr", AggregateFunction("sumIf", [UInt(32), UInt(8)])),
        ("users", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
        ("users_crashed", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
        ("users_abnormal", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
        ("users_errored", AggregateFunction("uniqIf", [UUID(), UInt(8)])),
    ]
)
read_schema = TableSchema(
    columns=read_columns,
    local_table_name=READ_LOCAL_TABLE_NAME,
    dist_table_name=READ_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
)
materialized_view_schema = TableSchema(
    local_table_name=READ_LOCAL_MV_NAME,
    dist_table_name=READ_DIST_MV_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
    columns=read_columns,
)

# The raw table we write onto, and that potentially we could
# query.
raw_storage = WritableTableStorage(
    storage_key=StorageKey.SESSIONS_RAW,
    storage_set_key=StorageSetKey.SESSIONS,
    schema=raw_schema,
Exemple #5
0
def process_delete_tag(
    message: Mapping[str, Any],
    schema: TableSchema,
    tag_column_map: Mapping[str, Mapping[str, str]],
    promoted_tags: Mapping[str, Sequence[str]],
) -> Optional[Replacement]:
    tag = message["tag"]
    if not tag:
        return None

    assert isinstance(tag, str)
    timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT)
    tag_column_name = tag_column_map["tags"].get(tag, tag)
    is_promoted = tag in promoted_tags["tags"]

    where = """\
        WHERE project_id = %(project_id)s
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    if is_promoted:
        prewhere = " PREWHERE %(tag_column)s IS NOT NULL "
    else:
        prewhere = " PREWHERE has(`tags.key`, %(tag_str)s) "

    insert_query_template = (
        """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    all_columns = [
        col
        for col in schema.get_columns()
        if Materialized not in col.type.get_all_modifiers()
    ]
    select_columns = []
    for col in all_columns:
        if is_promoted and col.flattened == tag_column_name:
            select_columns.append("NULL")
        elif col.flattened == "tags.key":
            select_columns.append(
                "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)"
                % escape_string(tag)
            )
        elif col.flattened == "tags.value":
            select_columns.append(
                "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))"
                % escape_string(tag)
            )
        elif col.flattened == "_tags_flattened":
            select_columns.append(FLATTENED_COLUMN_TEMPLATE % escape_string(tag))
        else:
            select_columns.append(col.escaped)

    all_column_names = [col.escaped for col in all_columns]
    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "project_id": message["project_id"],
        "tag_str": escape_string(tag),
        "tag_column": escape_identifier(tag_column_name),
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    count_query_template = (
        """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    query_time_flags = (NEEDS_FINAL, message["project_id"])

    return Replacement(
        count_query_template, insert_query_template, query_args, query_time_flags
    )
Exemple #6
0
    Column("granularity", UInt(8)),
    Column("timestamp", DateTime()),
    Column("retention_days", UInt(16)),
    Column(
        "tags",
        Nested([("key", UInt(64)), ("indexed_value", UInt(64)),
                ("raw_value", String())]),
    ),
    Column("_raw_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))),
    Column("_indexed_tags_hash", Array(UInt(64),
                                       SchemaModifiers(readonly=True))),
]

sets_storage = ReadableTableStorage(
    storage_key=StorageKey.GENERIC_METRICS_SETS,
    storage_set_key=StorageSetKey.GENERIC_METRICS_SETS,
    schema=TableSchema(
        local_table_name="generic_metrics_sets_local",
        dist_table_name="generic_metrics_sets_dist",
        storage_set_key=StorageSetKey.GENERIC_METRICS_SETS,
        columns=ColumnSet([
            *aggregated_columns,
            Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        ]),
    ),
    query_processors=[
        ArrayJoinKeyValueOptimizer("tags"),
        TableRateLimit(),
    ],
)
Exemple #7
0
        "sessions_crashed",
        AggregateFunction("countIf", UUID(), UInt(8)),
    ),
    (
        "sessions_abnormal",
        AggregateFunction("countIf", UUID(), UInt(8)),
    ),
    ("sessions_errored", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_crashed", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_abnormal", AggregateFunction("uniqIf", UUID(), UInt(8))),
    ("users_errored", AggregateFunction("uniqIf", UUID(), UInt(8))),
])
read_schema = TableSchema(
    columns=read_columns,
    local_table_name=READ_LOCAL_TABLE_NAME,
    dist_table_name=READ_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
    prewhere_candidates=["project_id", "org_id"],
)
materialized_view_schema = TableSchema(
    local_table_name=READ_LOCAL_MV_NAME,
    dist_table_name=READ_DIST_MV_NAME,
    storage_set_key=StorageSetKey.SESSIONS,
    prewhere_candidates=["project_id", "org_id"],
    columns=read_columns,
)

# The raw table we write onto, and that potentially we could
# query.
raw_storage = WritableTableStorage(
    storage_key=StorageKey.SESSIONS_RAW,
Exemple #8
0
    storage_set_key=StorageSetKey.OUTCOMES,
)

read_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("key_id", UInt(64)),
    ("timestamp", DateTime()),
    ("outcome", UInt(8)),
    ("reason", String()),
    ("times_seen", UInt(64)),
])

read_schema = TableSchema(
    columns=read_columns,
    local_table_name=READ_LOCAL_TABLE_NAME,
    dist_table_name=READ_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
)

materialized_view_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("key_id", UInt(64)),
    ("timestamp", DateTime()),
    ("outcome", UInt(8)),
    ("reason", String()),
    ("times_seen", UInt(64)),
])

materialized_view_schema = TableSchema(
    local_table_name="outcomes_mv_hourly_local",
Exemple #9
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storages import StorageKey

from snuba.datasets.storages.events_common import (
    all_columns,
    mandatory_conditions,
    query_processors,
    query_splitters,
)

schema = TableSchema(
    columns=all_columns,
    local_table_name="sentry_local",
    dist_table_name="sentry_dist_ro",
    storage_set_key=StorageSetKey.EVENTS_RO,
    mandatory_conditions=mandatory_conditions,
)

storage = ReadableTableStorage(
    storage_key=StorageKey.EVENTS_RO,
    storage_set_key=StorageSetKey.EVENTS_RO,
    schema=schema,
    query_processors=query_processors,
    query_splitters=query_splitters,
)
Exemple #10
0
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.schemas.join import (
    JoinConditionExpression,
    JoinCondition,
    JoinClause,
    JoinType,
    TableJoinNode,
)

table1 = TableSchema(
    columns=ColumnSet([
        ("t1c1", UInt(64)),
        ("t1c2", String()),
        ("t1c3", Nested([("t11c4", UInt(64))])),
    ]),
    local_table_name="table1",
    dist_table_name="table1",
    storage_set_key=StorageSetKey.EVENTS,
).get_data_source()

table2 = TableSchema(
    columns=ColumnSet([
        ("t2c1", UInt(64)),
        ("t2c2", String()),
        ("t2c3", Nested([("t21c4", UInt(64))])),
    ]),
    local_table_name="table2",
    dist_table_name="table2",
    storage_set_key=StorageSetKey.EVENTS,
).get_data_source()
Exemple #11
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storages import StorageKey

from snuba.datasets.storages.errors_common import (
    all_columns,
    mandatory_conditions,
    prewhere_candidates,
    query_processors,
    query_splitters,
)


schema = TableSchema(
    columns=all_columns,
    local_table_name="errors_local",
    dist_table_name="errors_dist_ro",
    storage_set_key=StorageSetKey.EVENTS_RO,
    mandatory_conditions=mandatory_conditions,
    prewhere_candidates=prewhere_candidates,
)

storage = ReadableTableStorage(
    storage_key=StorageKey.ERRORS_RO,
    storage_set_key=StorageSetKey.EVENTS_RO,
    schema=schema,
    query_processors=query_processors,
    query_splitters=query_splitters,
)
Exemple #12
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.transactions_common import (
    columns,
    mandatory_condition_checkers,
    query_processors,
    query_splitters,
)

schema = TableSchema(
    columns=columns,
    local_table_name="transactions_local",
    dist_table_name="transactions_dist",
    storage_set_key=StorageSetKey.TRANSACTIONS_RO,
    mandatory_conditions=[],
)

storage = ReadableTableStorage(
    storage_key=StorageKey.TRANSACTIONS_RO,
    storage_set_key=StorageSetKey.TRANSACTIONS_RO,
    schema=schema,
    query_processors=query_processors,
    query_splitters=query_splitters,
    mandatory_condition_checkers=mandatory_condition_checkers,
)
Exemple #13
0
    Column("metric_id", UInt(64)),
    Column("granularity", UInt(32)),
    Column("timestamp", DateTime()),
    Column("retention_days", UInt(16)),
    Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])),
    Column("_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))),
]

sets_storage = ReadableTableStorage(
    storage_key=StorageKey.METRICS_SETS,
    storage_set_key=StorageSetKey.METRICS,
    schema=TableSchema(
        local_table_name="metrics_sets_local",
        dist_table_name="metrics_sets_dist",
        storage_set_key=StorageSetKey.METRICS,
        columns=ColumnSet([
            *aggregated_columns,
            Column("value", AggregateFunction("uniqCombined64", [UInt(64)])),
        ]),
    ),
    query_processors=[ArrayJoinKeyValueOptimizer("tags")],
)

counters_storage = ReadableTableStorage(
    storage_key=StorageKey.METRICS_COUNTERS,
    storage_set_key=StorageSetKey.METRICS,
    schema=TableSchema(
        local_table_name="metrics_counters_local",
        dist_table_name="metrics_counters_dist",
        storage_set_key=StorageSetKey.METRICS,
        columns=ColumnSet([
Exemple #14
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.schemas.tables import TableSchema
from snuba.datasets.storage import ReadableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.errors_common import (
    all_columns,
    mandatory_conditions,
    query_splitters,
)
from snuba.datasets.storages.errors_v2 import query_processors

schema = TableSchema(
    columns=all_columns,
    local_table_name="errors_local",
    dist_table_name="errors_dist_ro",
    storage_set_key=StorageSetKey.ERRORS_V2_RO,
    mandatory_conditions=mandatory_conditions,
)

storage = ReadableTableStorage(
    storage_key=StorageKey.ERRORS_V2_RO,
    storage_set_key=StorageSetKey.ERRORS_V2_RO,
    schema=schema,
    query_processors=query_processors,
    query_splitters=query_splitters,
)
Exemple #15
0
        CounterAggregateProcessor(),
        default_topic=Topic.METRICS,
        dead_letter_queue_policy_creator=produce_policy_creator,
    ),
    write_format=WriteFormat.VALUES,
)

org_counters_storage = ReadableTableStorage(
    storage_key=StorageKey.ORG_METRICS_COUNTERS,
    storage_set_key=StorageSetKey.METRICS,
    schema=TableSchema(
        local_table_name="metrics_counters_v2_local",
        dist_table_name="metrics_counters_v2_dist",
        storage_set_key=StorageSetKey.METRICS,
        columns=ColumnSet([
            Column("org_id", UInt(64)),
            Column("project_id", UInt(64)),
            Column("metric_id", UInt(64)),
            Column("granularity", UInt(32)),
            Column("timestamp", DateTime()),
        ]),
    ),
    query_processors=[TableRateLimit()],
)

distributions_storage = WritableTableStorage(
    storage_key=StorageKey.METRICS_DISTRIBUTIONS,
    storage_set_key=StorageSetKey.METRICS,
    schema=WritableTableSchema(
        local_table_name="metrics_distributions_v2_local",
        dist_table_name="metrics_distributions_v2_dist",
        storage_set_key=StorageSetKey.METRICS,