Beispiel #1
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.SESSIONS,
             table_name="sessions_raw_local",
             columns=raw_columns,
             engine=table_engines.MergeTree(
                 storage_set=StorageSetKey.SESSIONS,
                 order_by=
                 "(org_id, project_id, release, environment, started)",
                 partition_by="(toMonday(started))",
                 settings={"index_granularity": "16384"},
             ),
         ),
         operations.CreateTable(
             storage_set=StorageSetKey.SESSIONS,
             table_name="sessions_hourly_local",
             columns=aggregate_columns_v1,
             engine=table_engines.AggregatingMergeTree(
                 storage_set=StorageSetKey.SESSIONS,
                 order_by=
                 "(org_id, project_id, release, environment, started)",
                 partition_by="(toMonday(started))",
                 settings={"index_granularity": "256"},
             ),
         ),
         create_matview_v1,
     ]
Beispiel #2
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.SESSIONS,
             table_name="sessions_raw_local",
             columns=raw_columns,
             engine=table_engines.MergeTree(
                 storage_set=StorageSetKey.SESSIONS,
                 order_by=
                 "(org_id, project_id, release, environment, started)",
                 partition_by="(toMonday(started))",
                 settings={"index_granularity": "16384"},
             ),
         ),
         operations.CreateTable(
             storage_set=StorageSetKey.SESSIONS,
             table_name="sessions_hourly_local",
             columns=aggregate_columns,
             engine=table_engines.AggregatingMergeTree(
                 storage_set=StorageSetKey.SESSIONS,
                 order_by=
                 "(org_id, project_id, release, environment, started)",
                 partition_by="(toMonday(started))",
                 settings={"index_granularity": "256"},
             ),
         ),
         operations.CreateMaterializedView(
             storage_set=StorageSetKey.SESSIONS,
             view_name="sessions_hourly_mv_local",
             destination_table_name="sessions_hourly_local",
             columns=aggregate_columns,
             query=f"""
                 SELECT
                     org_id,
                     project_id,
                     toStartOfHour(started) as started,
                     release,
                     environment,
                     quantilesIfState(0.5, 0.9)(
                         duration,
                         duration <> {MAX_UINT32} AND status == 1
                     ) as duration_quantiles,
                     countIfState(session_id, seq == 0) as sessions,
                     uniqIfState(distinct_id, distinct_id != '{NIL_UUID}') as users,
                     countIfState(session_id, status == 2) as sessions_crashed,
                     countIfState(session_id, status == 3) as sessions_abnormal,
                     uniqIfState(session_id, errors > 0) as sessions_errored,
                     uniqIfState(distinct_id, status == 2) as users_crashed,
                     uniqIfState(distinct_id, status == 3) as users_abnormal,
                     uniqIfState(distinct_id, errors > 0) as users_errored
                 FROM
                     sessions_raw_local
                 GROUP BY
                     org_id, project_id, started, release, environment
             """,
         ),
     ]
Beispiel #3
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.QUERYLOG,
             table_name="querylog_local",
             columns=columns,
             engine=table_engines.MergeTree(
                 storage_set=StorageSetKey.QUERYLOG,
                 order_by="(toStartOfDay(timestamp), request_id)",
                 partition_by="(toMonday(timestamp))",
                 sample_by="request_id",
             ),
         )
     ]
Beispiel #4
0
 def forwards_local(self) -> Sequence[operations.SqlOperation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.METRICS,
             table_name=self.local_table_name,
             columns=self.column_list,
             engine=table_engines.MergeTree(
                 storage_set=StorageSetKey.METRICS,
                 order_by=
                 "(use_case_id, metric_type, org_id, project_id, metric_id, timestamp)",
                 partition_by="(toStartOfDay(timestamp))",
                 ttl="timestamp + toIntervalDay(7)",
             ),
         )
     ]
Beispiel #5
0
def test_zookeeper_path_override() -> None:
    orig_path = "/clickhouse/tables/events/all/default/test_table"
    override_path = "/clickhouse/tables/test_table"

    engine = table_engines.MergeTree(storage_set=StorageSetKey.EVENTS,
                                     order_by="timestamp",
                                     unsharded=True)
    assert (
        engine.get_sql(multi_node_cluster, "test_table") ==
        f"ReplicatedMergeTree('{orig_path}', '{{replica}}') ORDER BY timestamp"
    )
    settings.CLICKHOUSE_ZOOKEEPER_OVERRIDE = {orig_path: override_path}
    assert (
        engine.get_sql(multi_node_cluster, "test_table") ==
        f"ReplicatedMergeTree('{override_path}', '{{replica}}') ORDER BY timestamp"
    )

    settings.CLICKHOUSE_ZOOKEEPER_OVERRIDE = {}
Beispiel #6
0
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.CreateTable(
             storage_set=StorageSetKey.OUTCOMES,
             table_name="outcomes_raw_local",
             columns=raw_columns,
             engine=table_engines.MergeTree(
                 storage_set=StorageSetKey.OUTCOMES,
                 order_by="(org_id, project_id, timestamp)",
                 partition_by="(toMonday(timestamp))",
                 settings={"index_granularity": "16384"},
             ),
         ),
         operations.CreateTable(
             storage_set=StorageSetKey.OUTCOMES,
             table_name="outcomes_hourly_local",
             columns=hourly_columns,
             engine=table_engines.SummingMergeTree(
                 storage_set=StorageSetKey.OUTCOMES,
                 order_by=
                 "(org_id, project_id, key_id, outcome, reason, timestamp)",
                 partition_by="(toMonday(timestamp))",
                 settings={"index_granularity": "256"},
             ),
         ),
         operations.CreateMaterializedView(
             storage_set=StorageSetKey.OUTCOMES,
             view_name="outcomes_mv_hourly_local",
             destination_table_name="outcomes_hourly_local",
             columns=materialized_view_columns,
             query="""
                 SELECT
                     org_id,
                     project_id,
                     ifNull(key_id, 0) AS key_id,
                     toStartOfHour(timestamp) AS timestamp,
                     outcome,
                     ifNull(reason, 'none') AS reason,
                     count() AS times_seen
                 FROM outcomes_raw_local
                 GROUP BY org_id, project_id, key_id, timestamp, outcome, reason
             """,
         ),
     ]
Beispiel #7
0
def get_forward_bucket_table_local(
    table_name: str,
    value_cols: Sequence[Column[Modifiers]],
) -> Sequence[operations.SqlOperation]:
    return [
        operations.CreateTable(
            storage_set=StorageSetKey.METRICS,
            table_name=table_name,
            columns=[
                *PRE_VALUE_BUCKETS_COLUMNS,
                *value_cols,
                *POST_VALUES_BUCKETS_COLUMNS,
            ],
            engine=table_engines.MergeTree(
                storage_set=StorageSetKey.METRICS,
                order_by="(org_id, project_id, metric_id, timestamp)",
                partition_by="toMonday(timestamp)",
                ttl="timestamp + toIntervalDay(14)",
            ),
        )
    ]
Beispiel #8
0
multi_node_cluster = ClickhouseCluster(
    host="host_2",
    port=9000,
    user="******",
    password="",
    database="default",
    http_port=8123,
    storage_sets={"events"},
    single_node=False,
    cluster_name="cluster_1",
    distributed_cluster_name="dist_hosts",
)

merge_test_cases = [
    pytest.param(
        table_engines.MergeTree(storage_set=StorageSetKey.EVENTS,
                                order_by="timestamp"),
        "MergeTree() ORDER BY timestamp",
        "ReplicatedMergeTree('/clickhouse/tables/events/{shard}/test_table', '{replica}') ORDER BY timestamp",
        id="Merge tree",
    ),
    pytest.param(
        table_engines.MergeTree(
            storage_set=StorageSetKey.TRANSACTIONS,
            order_by="date",
            settings={"index_granularity": "256"},
        ),
        "MergeTree() ORDER BY date SETTINGS index_granularity=256",
        "ReplicatedMergeTree('/clickhouse/tables/transactions/{shard}/test_table', '{replica}') ORDER BY date SETTINGS index_granularity=256",
        id="Merge tree with settings",
    ),
    pytest.param(