def test_accepted_services(self): v1 = ServiceVersionRequirement(service="postgresql", supported_version="==14.0.0") v2 = ServiceVersionRequirement(service="clickhouse", supported_version="==21.6.0") v3 = ServiceVersionRequirement(service="redis", supported_version="==6.2.6") self.assertEqual(v1.service, "postgresql") self.assertEqual(v2.service, "clickhouse") self.assertEqual(v3.service, "redis") self.assertEqual(type(v1.supported_version), SimpleSpec) self.assertEqual(type(v2.supported_version), SimpleSpec) self.assertEqual(type(v3.supported_version), SimpleSpec) self.assertEqual(str(v1.supported_version), "==14.0.0") self.assertEqual(str(v2.supported_version), "==21.6.0") self.assertEqual(str(v3.supported_version), "==6.2.6") try: ServiceVersionRequirement(service="kea", supported_version="==2.5.0") except Exception as e: self.assertEqual( str(e), "service kea cannot be used to specify a version requirement. service should be one of clickhouse, postgresql, redis", )
class Migration(AsyncMigrationDefinition): description = ( "Schema change to the events table ensuring our SAMPLE BY clause is compatible with ClickHouse >=21.7.0." ) depends_on = "0001_events_sample_by" posthog_min_version = "1.30.0" posthog_max_version = "1.33.9" service_version_requirements = [ ServiceVersionRequirement(service="clickhouse", supported_version=">=21.6.0"), ] @cached_property def operations(self): if self._events_table_engine() == "Distributed": # Note: This _should_ be impossible but hard to ensure. raise RuntimeError( "Cannot run the migration as `events` table is already Distributed engine." ) create_table_op = [ AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=f""" CREATE TABLE IF NOT EXISTS {TEMPORARY_TABLE_NAME} ON CLUSTER '{CLICKHOUSE_CLUSTER}' AS {EVENTS_TABLE_NAME} ENGINE = ReplacingMergeTree(_timestamp) PARTITION BY toYYYYMM(timestamp) ORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid)) SAMPLE BY cityHash64(distinct_id) """, rollback= f"DROP TABLE IF EXISTS {TEMPORARY_TABLE_NAME} ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ) ] old_partition_ops = [] previous_partition = self._partitions[0] if len( self._partitions) > 0 else None for partition in self._partitions[1:]: old_partition_ops.append( generate_insert_into_op(previous_partition, partition)) previous_partition = partition detach_mv_ops = [ AsyncMigrationOperation( fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False), rollback_fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True), ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql= f"DETACH TABLE {EVENTS_TABLE_NAME}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", rollback= f"ATTACH TABLE {EVENTS_TABLE_NAME}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ), ] last_partition_op = [ generate_insert_into_op( self._partitions[-1] if len(self._partitions) > 0 else 0) ] def optimize_table_fn(query_id): default_timeout = ASYNC_MIGRATIONS_DEFAULT_TIMEOUT_SECONDS try: execute_op_clickhouse( f"OPTIMIZE TABLE {EVENTS_TABLE_NAME} FINAL", query_id, settings={ "max_execution_time": default_timeout, "send_timeout": default_timeout, "receive_timeout": default_timeout, }, ) except: # TODO: we should only pass the timeout one here pass post_insert_ops = [ AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=f""" RENAME TABLE {EVENTS_TABLE_NAME} to {BACKUP_TABLE_NAME}, {TEMPORARY_TABLE_NAME} to {EVENTS_TABLE_NAME} ON CLUSTER '{CLICKHOUSE_CLUSTER}' """, rollback=f""" RENAME TABLE {EVENTS_TABLE_NAME} to {FAILED_EVENTS_TABLE_NAME}, {BACKUP_TABLE_NAME} to {EVENTS_TABLE_NAME} ON CLUSTER '{CLICKHOUSE_CLUSTER}' """, ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql= f"ATTACH TABLE {EVENTS_TABLE_NAME}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", rollback= f"DETACH TABLE {EVENTS_TABLE_NAME}_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ), AsyncMigrationOperation( fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", True), rollback_fn=lambda _: setattr( config, "COMPUTE_MATERIALIZED_COLUMNS_ENABLED", False), ), AsyncMigrationOperation(fn=optimize_table_fn), ] _operations = create_table_op + old_partition_ops + detach_mv_ops + last_partition_op + post_insert_ops return _operations def is_required(self): if settings.MULTI_TENANCY: return False res = sync_execute(f"SHOW CREATE TABLE {EVENTS_TABLE_NAME}") return ( "ORDER BY (team_id, toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid))" not in res[0][0]) def precheck(self): events_failed_table_exists = sync_execute( f"EXISTS {FAILED_EVENTS_TABLE_NAME}")[0][0] if events_failed_table_exists: return ( False, f"{FAILED_EVENTS_TABLE_NAME} already exists. We use this table as a backup if the migration fails. You can delete or rename it and restart the migration.", ) events_table = "sharded_events" if CLICKHOUSE_REPLICATION else "events" result = sync_execute(f""" SELECT (free_space.size / greatest(event_table_size.size, 1)) FROM (SELECT 1 as jc, 'event_table_size', sum(bytes) as size FROM system.parts WHERE table = '{events_table}' AND database='{CLICKHOUSE_DATABASE}') event_table_size JOIN (SELECT 1 as jc, 'free_disk_space', free_space as size FROM system.disks WHERE name = 'default') free_space ON event_table_size.jc=free_space.jc """) event_size_to_free_space_ratio = result[0][0] # Require 1.5x the events table in free space to be available if event_size_to_free_space_ratio > 1.5: return (True, None) else: result = sync_execute(f""" SELECT formatReadableSize(free_space.size - (free_space.free_space - (1.5 * event_table_size.size ))) as required FROM (SELECT 1 as jc, 'event_table_size', sum(bytes) as size FROM system.parts WHERE table = '{events_table}' AND database='{CLICKHOUSE_DATABASE}') event_table_size JOIN (SELECT 1 as jc, 'free_disk_space', free_space, total_space as size FROM system.disks WHERE name = 'default') free_space ON event_table_size.jc=free_space.jc """) required_space = result[0][0] return ( False, f"Upgrade your ClickHouse storage to at least {required_space}." ) def healthcheck(self): result = sync_execute("SELECT free_space FROM system.disks") # 100mb or less left if int(result[0][0]) < 100000000: return (False, "ClickHouse available storage below 100MB") return (True, None) @cached_property def _partitions(self): return list( sorted(row[0] for row in sync_execute( f"SELECT DISTINCT toUInt32(partition) FROM system.parts WHERE database = %(database)s AND table='{EVENTS_TABLE}'", {"database": CLICKHOUSE_DATABASE}, ))) def _events_table_engine(self) -> str: rows = sync_execute( "SELECT engine FROM system.tables WHERE database = %(database)s AND name = 'events'", {"database": CLICKHOUSE_DATABASE}, ) return rows[0][0]
class Migration(AsyncMigrationDefinition): description = "An example async migration." posthog_min_version = "1.29.0" posthog_max_version = "1.30.0" service_version_requirements = [ ServiceVersionRequirement(service="clickhouse", supported_version=">=21.6.0,<21.7.0"), ] operations = [ AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=PERSONS_DISTINCT_ID_TABLE_SQL().replace( PERSONS_DISTINCT_ID_TABLE, TEMPORARY_TABLE_NAME, 1), rollback= f"DROP TABLE IF EXISTS {TEMPORARY_TABLE_NAME} ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql= f"DROP TABLE person_distinct_id_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", rollback=PERSONS_DISTINCT_ID_TABLE_MV_SQL, ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql= f"DROP TABLE kafka_person_distinct_id ON CLUSTER '{CLICKHOUSE_CLUSTER}'", rollback=KAFKA_PERSONS_DISTINCT_ID_TABLE_SQL(), ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=f""" INSERT INTO {TEMPORARY_TABLE_NAME} (distinct_id, person_id, team_id, _sign, _timestamp, _offset) SELECT distinct_id, person_id, team_id, if(is_deleted==0, 1, -1) as _sign, _timestamp, _offset FROM {PERSONS_DISTINCT_ID_TABLE} """, rollback=f"DROP TABLE IF EXISTS {TEMPORARY_TABLE_NAME}", ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=f""" RENAME TABLE {CLICKHOUSE_DATABASE}.{PERSONS_DISTINCT_ID_TABLE} to {CLICKHOUSE_DATABASE}.person_distinct_id_async_migration_backup, {CLICKHOUSE_DATABASE}.{TEMPORARY_TABLE_NAME} to {CLICKHOUSE_DATABASE}.{PERSONS_DISTINCT_ID_TABLE} ON CLUSTER '{CLICKHOUSE_CLUSTER}' """, rollback=f""" RENAME TABLE {CLICKHOUSE_DATABASE}.{PERSONS_DISTINCT_ID_TABLE} to {CLICKHOUSE_DATABASE}.{TEMPORARY_TABLE_NAME} {CLICKHOUSE_DATABASE}.person_distinct_id_async_migration_backup to {CLICKHOUSE_DATABASE}.person_distinct_id, ON CLUSTER '{CLICKHOUSE_CLUSTER}' """, ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=KAFKA_PERSONS_DISTINCT_ID_TABLE_SQL(), rollback= f"DROP TABLE IF EXISTS kafka_person_distinct_id ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ), AsyncMigrationOperation.simple_op( database=AnalyticsDBMS.CLICKHOUSE, sql=PERSONS_DISTINCT_ID_TABLE_MV_SQL, rollback= f"DROP TABLE IF EXISTS person_distinct_id_mv ON CLUSTER '{CLICKHOUSE_CLUSTER}'", ), AsyncMigrationOperation(fn=example_fn, rollback_fn=example_rollback_fn), ] def healthcheck(self): result = sync_execute( "SELECT total_space, free_space FROM system.disks") total_space = result[0][0] free_space = result[0][1] if free_space > total_space / 3: return (True, None) else: return (False, "Upgrade your ClickHouse storage.") def progress(self, _): result = sync_execute(f"SELECT COUNT(1) FROM {TEMPORARY_TABLE_NAME}") result2 = sync_execute( f"SELECT COUNT(1) FROM {PERSONS_DISTINCT_ID_TABLE}") total_events_to_move = result2[0][0] total_events_moved = result[0][0] progress = 100 * total_events_moved / total_events_to_move return progress def is_required(self): res = sync_execute("SHOW CREATE TABLE person_distinct_id") return "ReplacingMergeTree" in res[0][0]
def test_ranges(self): v1 = ServiceVersionRequirement(service="postgresql", supported_version="==14.0.0") in_range, service_version = v1.is_service_in_accepted_version() self.assertEqual(in_range, False) self.assertEqual(str(service_version), "12.1.2") v2 = ServiceVersionRequirement(service="postgresql", supported_version="==12.1.2") in_range, _ = v2.is_service_in_accepted_version() self.assertEqual(in_range, True) v3 = ServiceVersionRequirement(service="postgresql", supported_version=">=12.0.0,<12.1.2") in_range, _ = v3.is_service_in_accepted_version() self.assertEqual(in_range, False) v4 = ServiceVersionRequirement(service="postgresql", supported_version=">=12.0.0,<=12.1.2") in_range, _ = v4.is_service_in_accepted_version() self.assertEqual(in_range, True) v5 = ServiceVersionRequirement(service="postgresql", supported_version=">=11.0.0,<=13.0.0") in_range, _ = v5.is_service_in_accepted_version() self.assertEqual(in_range, True)
from posthog.settings.base_variables import DEBUG, IS_COLLECT_STATIC, TEST from posthog.settings.utils import get_from_env, print_warning, str_to_bool from posthog.version_requirement import ServiceVersionRequirement SKIP_SERVICE_VERSION_REQUIREMENTS = get_from_env( "SKIP_SERVICE_VERSION_REQUIREMENTS", TEST or IS_COLLECT_STATIC or DEBUG, type_cast=str_to_bool ) if SKIP_SERVICE_VERSION_REQUIREMENTS and not (TEST or DEBUG): print_warning(["Skipping service version requirements. This is dangerous and PostHog might not work as expected!"]) SERVICE_VERSION_REQUIREMENTS = [ ServiceVersionRequirement(service="postgresql", supported_version=">=11.0.0,<=14.1.0",), ServiceVersionRequirement(service="redis", supported_version=">=5.0.0,<=6.3.0",), ServiceVersionRequirement(service="clickhouse", supported_version=">=21.6.0,<21.12.0"), ]