PLUGIN_LOG_ENTRIES_TABLE_BASE_SQL + """PARTITION BY plugin_id ORDER BY (team_id, id) {ttl_period} SETTINGS index_granularity=512 """).format( table_name=PLUGIN_LOG_ENTRIES_TABLE, cluster=CLICKHOUSE_CLUSTER, extra_fields=KAFKA_COLUMNS, engine=PLUGIN_LOG_ENTRIES_TABLE_ENGINE(), ttl_period=ttl_period("timestamp", TTL_WEEKS), ) KAFKA_PLUGIN_LOG_ENTRIES_TABLE_SQL = lambda: PLUGIN_LOG_ENTRIES_TABLE_BASE_SQL.format( table_name="kafka_" + PLUGIN_LOG_ENTRIES_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(topic=KAFKA_PLUGIN_LOG_ENTRIES), extra_fields="", ) PLUGIN_LOG_ENTRIES_TABLE_MV_SQL = """ CREATE MATERIALIZED VIEW {table_name}_mv ON CLUSTER '{cluster}' TO {database}.{table_name} AS SELECT id, team_id, plugin_id, plugin_config_id, timestamp, source, type, message,
+ """ORDER BY (id, event_uuid, distinct_id, team_id) {ttl_period} SETTINGS index_granularity=512 """ ).format( table_name=DEAD_LETTER_QUEUE_TABLE, cluster=CLICKHOUSE_CLUSTER, extra_fields=KAFKA_COLUMNS, engine=DEAD_LETTER_QUEUE_TABLE_ENGINE(), ttl_period=ttl_period("_timestamp", 4), # 4 weeks ) KAFKA_DEAD_LETTER_QUEUE_TABLE_SQL = lambda: DEAD_LETTER_QUEUE_TABLE_BASE_SQL.format( table_name="kafka_" + DEAD_LETTER_QUEUE_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(topic=KAFKA_DEAD_LETTER_QUEUE), extra_fields="", ) DEAD_LETTER_QUEUE_TABLE_MV_SQL = """ CREATE MATERIALIZED VIEW IF NOT EXISTS {table_name}_mv ON CLUSTER {cluster} TO {database}.{table_name} AS SELECT id, event_uuid, event, properties, distinct_id, team_id, elements_chain, created_at,
GROUPS_TABLE_ENGINE = lambda: ReplacingMergeTree(GROUPS_TABLE, ver="_timestamp") GROUPS_TABLE_SQL = lambda: ( GROUPS_TABLE_BASE_SQL + """Order By (team_id, group_type_index, group_key) {storage_policy} """ ).format( table_name=GROUPS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=GROUPS_TABLE_ENGINE(), extra_fields=KAFKA_COLUMNS, storage_policy=STORAGE_POLICY(), ) KAFKA_GROUPS_TABLE_SQL = lambda: GROUPS_TABLE_BASE_SQL.format( table_name="kafka_" + GROUPS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(KAFKA_GROUPS), extra_fields="", ) # You must include the database here because of a bug in clickhouse # related to https://github.com/ClickHouse/ClickHouse/issues/10471 GROUPS_TABLE_MV_SQL = f""" CREATE MATERIALIZED VIEW {GROUPS_TABLE}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}' TO {CLICKHOUSE_DATABASE}.{GROUPS_TABLE} AS SELECT group_type_index, group_key, created_at, team_id, group_properties, _timestamp, _offset
ver="_timestamp") GROUPS_TABLE_SQL = lambda: (GROUPS_TABLE_BASE_SQL + """Order By (team_id, group_type_index, group_key) {storage_policy} """).format( table_name=GROUPS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=GROUPS_TABLE_ENGINE(), extra_fields=KAFKA_COLUMNS, storage_policy=STORAGE_POLICY(), ) KAFKA_GROUPS_TABLE_SQL = lambda: GROUPS_TABLE_BASE_SQL.format( table_name="kafka_" + GROUPS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(KAFKA_GROUPS), extra_fields="", ) # You must include the database here because of a bug in clickhouse # related to https://github.com/ClickHouse/ClickHouse/issues/10471 GROUPS_TABLE_MV_SQL = f""" CREATE MATERIALIZED VIEW {GROUPS_TABLE}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}' TO {CLICKHOUSE_DATABASE}.{GROUPS_TABLE} AS SELECT group_type_index, group_key, created_at, team_id, group_properties, _timestamp,
ORDER BY (team_id, toHour(timestamp), session_id, timestamp, uuid) {ttl_period} SETTINGS index_granularity=512 """).format( table_name=SESSION_RECORDING_EVENTS_DATA_TABLE(), cluster=settings.CLICKHOUSE_CLUSTER, materialized_columns=SESSION_RECORDING_EVENTS_MATERIALIZED_COLUMNS, extra_fields=KAFKA_COLUMNS, engine=SESSION_RECORDING_EVENTS_DATA_TABLE_ENGINE(), ttl_period=ttl_period(), ) KAFKA_SESSION_RECORDING_EVENTS_TABLE_SQL = lambda: SESSION_RECORDING_EVENTS_TABLE_BASE_SQL.format( table_name="kafka_session_recording_events", cluster=settings.CLICKHOUSE_CLUSTER, engine=kafka_engine(topic=KAFKA_SESSION_RECORDING_EVENTS), materialized_columns="", extra_fields=KAFKA_COLUMNS, ) SESSION_RECORDING_EVENTS_TABLE_MV_SQL = lambda: """ CREATE MATERIALIZED VIEW session_recording_events_mv ON CLUSTER {cluster} TO {database}.{target_table} AS SELECT uuid, timestamp, team_id, distinct_id, session_id, window_id, snapshot_data,
{storage_policy} """).format( table_name=EVENTS_DATA_TABLE(), cluster=settings.CLICKHOUSE_CLUSTER, engine=EVENTS_DATA_TABLE_ENGINE(), extra_fields=KAFKA_COLUMNS, materialized_columns=EVENTS_TABLE_MATERIALIZED_COLUMNS, sample_by="SAMPLE BY cityHash64(distinct_id)", storage_policy=STORAGE_POLICY(), ) KAFKA_EVENTS_TABLE_SQL = lambda: EVENTS_TABLE_BASE_SQL.format( table_name="kafka_events", cluster=settings.CLICKHOUSE_CLUSTER, engine=kafka_engine(topic=KAFKA_EVENTS, serialization="Protobuf", proto_schema="events:Event"), extra_fields="", materialized_columns="", ) # You must include the database here because of a bug in clickhouse # related to https://github.com/ClickHouse/ClickHouse/issues/10471 EVENTS_TABLE_MV_SQL = lambda: """ CREATE MATERIALIZED VIEW events_mv ON CLUSTER {cluster} TO {database}.{target_table} AS SELECT uuid, event, properties, timestamp,
PERSONS_TABLE_ENGINE = lambda: ReplacingMergeTree(PERSONS_TABLE, ver="_timestamp") PERSONS_TABLE_SQL = lambda: (PERSONS_TABLE_BASE_SQL + """Order By (team_id, id) {storage_policy} """).format( table_name=PERSONS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=PERSONS_TABLE_ENGINE(), extra_fields=KAFKA_COLUMNS, storage_policy=STORAGE_POLICY(), ) KAFKA_PERSONS_TABLE_SQL = lambda: PERSONS_TABLE_BASE_SQL.format( table_name="kafka_" + PERSONS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(KAFKA_PERSON), extra_fields="", ) # You must include the database here because of a bug in clickhouse # related to https://github.com/ClickHouse/ClickHouse/issues/10471 PERSONS_TABLE_MV_SQL = """ CREATE MATERIALIZED VIEW {table_name}_mv ON CLUSTER {cluster} TO {database}.{table_name} AS SELECT id, created_at, team_id, properties, is_identified, is_deleted,
PERSONS_TABLE_ENGINE = lambda: ReplacingMergeTree(PERSONS_TABLE, ver="_timestamp") PERSONS_TABLE_SQL = lambda: ( PERSONS_TABLE_BASE_SQL + """Order By (team_id, id) {storage_policy} """ ).format( table_name=PERSONS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=PERSONS_TABLE_ENGINE(), extra_fields=KAFKA_COLUMNS, storage_policy=STORAGE_POLICY(), ) KAFKA_PERSONS_TABLE_SQL = lambda: PERSONS_TABLE_BASE_SQL.format( table_name="kafka_" + PERSONS_TABLE, cluster=CLICKHOUSE_CLUSTER, engine=kafka_engine(KAFKA_PERSON), extra_fields="", ) # You must include the database here because of a bug in clickhouse # related to https://github.com/ClickHouse/ClickHouse/issues/10471 PERSONS_TABLE_MV_SQL = """ CREATE MATERIALIZED VIEW {table_name}_mv ON CLUSTER '{cluster}' TO {database}.{table_name} AS SELECT id, created_at, team_id, properties, is_identified, is_deleted, _timestamp,