def test_single_transaction(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False, ) cursor = conn.cursor() table_name = "postgresql_replica_0" create_postgres_table(cursor, table_name) conn.commit() pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) assert_nested_table_is_created(instance, table_name) for query in queries: print("query {}".format(query)) cursor.execute(query.format(0)) time.sleep(5) result = instance.query(f"select count() from test_database.{table_name}") # no commit yet assert int(result) == 0 conn.commit() check_tables_are_synchronized(instance, table_name)
def test_predefined_connection_configuration(started_cluster): cursor = pg_manager.get_db_cursor() cursor.execute(f'DROP TABLE IF EXISTS test_table') cursor.execute(f'CREATE TABLE test_table (key integer PRIMARY KEY, value integer)') cursor.execute(f'INSERT INTO test_table SELECT 1, 2') instance.query("CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'") check_tables_are_synchronized(instance, "test_table"); pg_manager.drop_materialized_db()
def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print('checking table', i) check_tables_are_synchronized( instance, f"postgresql_replica_{i}", postgres_database=clickhouse_postgres_db) print('synchronization Ok')
def test_quoting_2(started_cluster): table_name = 'user' pg_manager.create_and_fill_postgres_table(table_name) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[f"materialized_postgresql_tables_list = '{table_name}'"]) check_tables_are_synchronized(instance, table_name)
def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print('checking table', i) check_tables_are_synchronized( instance, "postgresql_replica_{}".format(i), schema_name=schema_name, postgres_database=clickhouse_postgres_db) print('synchronization Ok')
def check_all_tables_are_synchronized(): for i in range(schemas_num): schema_name = f'schema{i}' clickhouse_postgres_db = f'clickhouse_postgres_db{i}' for ti in range(NUM_TABLES): table_name = f'postgresql_replica_{ti}' print(f'checking table {schema_name}.{table_name}') check_tables_are_synchronized(instance, f'{table_name}', schema_name=schema_name, postgres_database=clickhouse_postgres_db); print('synchronized Ok')
def test_predefined_connection_configuration(started_cluster): if instance.is_built_with_sanitizer() or instance.is_debug_build(): pytest.skip("Temporary disabled (FIXME)") cursor = pg_manager.get_db_cursor() cursor.execute(f"DROP TABLE IF EXISTS test_table") cursor.execute( f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") cursor.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" ) check_tables_are_synchronized(instance, "test_table") pg_manager.drop_materialized_db()
def check_all_tables_are_synchronized(): for i in range(schemas_num): schema_name = f"schema{i}" clickhouse_postgres_db = f"clickhouse_postgres_db{i}" for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" print(f"checking table {schema_name}.{table_name}") check_tables_are_synchronized( instance, f"{table_name}", schema_name=schema_name, postgres_database=clickhouse_postgres_db, ) print("synchronized Ok")
def test_remove_table_from_replication(started_cluster): NUM_TABLES = 5 pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) check_several_tables_are_synchronized(instance, NUM_TABLES) result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n") result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") table_name = 'postgresql_replica_4' instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query_and_get_error(f'SELECT * FROM test_database.{table_name}') assert("doesn't exist" in result) result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\n") result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3\\'\n") instance.query(f'ATTACH TABLE test_database.{table_name}'); check_tables_are_synchronized(instance, table_name); check_several_tables_are_synchronized(instance, NUM_TABLES) instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") check_tables_are_synchronized(instance, table_name); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-159:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") table_name = 'postgresql_replica_1' instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") cursor = pg_manager.get_db_cursor() cursor.execute(f'drop table if exists postgresql_replica_0;') # Removing from replication table which does not exist in PostgreSQL must be ok. instance.query('DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY'); assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL")
def test_load_and_sync_subset_of_database_tables(started_cluster): NUM_TABLES = 10 pg_manager.create_and_fill_postgres_tables(NUM_TABLES) publication_tables = "" for i in range(NUM_TABLES): if i < int(NUM_TABLES / 2): if publication_tables != "": publication_tables += ", " publication_tables += f"postgresql_replica_{i}" pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ "materialized_postgresql_tables_list = '{}'".format( publication_tables) ], ) time.sleep(1) for i in range(int(NUM_TABLES / 2)): table_name = f"postgresql_replica_{i}" assert_nested_table_is_created(instance, table_name) result = instance.query( """SELECT count() FROM system.tables WHERE database = 'test_database';""" ) assert int(result) == int(NUM_TABLES / 2) database_tables = instance.query("SHOW TABLES FROM test_database") for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) if i < int(NUM_TABLES / 2): assert table_name in database_tables else: assert table_name not in database_tables instance.query( "INSERT INTO postgres_database.{} SELECT 50 + number, {} from numbers(100)" .format(table_name, i)) for i in range(NUM_TABLES): table_name = f"postgresql_replica_{i}" if i < int(NUM_TABLES / 2): check_tables_are_synchronized(instance, table_name)
def test_changing_replica_identity_value(started_cluster): conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() create_postgres_table(cursor, 'postgresql_replica') instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)" ) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)" ) check_tables_are_synchronized(instance, 'postgresql_replica') cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") check_tables_are_synchronized(instance, 'postgresql_replica')
def test_concurrent_transactions(started_cluster): def transaction(thread_id): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False, ) cursor = conn.cursor() for query in queries: cursor.execute(query.format(thread_id)) print("thread {}, query {}".format(thread_id, query)) conn.commit() NUM_TABLES = 6 pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=0) threads = [] threads_num = 6 for i in range(threads_num): threads.append(threading.Thread(target=transaction, args=(i, ))) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 0.5)) thread.start() for thread in threads: thread.join() for i in range(NUM_TABLES): check_tables_are_synchronized(instance, f"postgresql_replica_{i}") count1 = instance.query( f"SELECT count() FROM postgres_database.postgresql_replica_{i}") count2 = instance.query( f"SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{i})" ) print(int(count1), int(count2), sep=" ") assert int(count1) == int(count2)
def test_virtual_columns(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() table_name = "postgresql_replica_0" create_postgres_table(cursor, table_name) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=["materialized_postgresql_allow_automatic_update = 1"], ) assert_nested_table_is_created(instance, table_name) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10)" ) check_tables_are_synchronized(instance, table_name) # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. result = instance.query( f"SELECT key, value, _sign, _version FROM test_database.{table_name};") print(result) cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value2 integer") instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(10, 10)" ) assert_number_of_columns(instance, 3, table_name) check_tables_are_synchronized(instance, table_name) result = instance.query( "SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;" ) print(result) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(20, 10)" ) check_tables_are_synchronized(instance, table_name) result = instance.query( f"SELECT key, value, value2, _sign, _version FROM test_database.{table_name};" ) print(result)
def test_replica_identity_index(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() create_postgres_table(cursor, "postgresql_replica", template=postgres_table_template_3) cursor.execute( "CREATE unique INDEX idx on postgresql_replica(key1, key2);") cursor.execute( "ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)" ) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(100, 10)" ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") cursor.execute( "UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") cursor.execute( "UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") cursor.execute( "UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") cursor.execute( "UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") cursor.execute("DELETE FROM postgresql_replica WHERE key2<75;") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1")
def test_user_managed_slots(started_cluster): slot_name = "user_slot" table_name = "test_table" pg_manager.create_and_fill_postgres_table(table_name) replication_connection = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, replication=True, auto_commit=True, ) snapshot = create_replication_slot(replication_connection, slot_name=slot_name) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ f"materialized_postgresql_replication_slot = '{slot_name}'", f"materialized_postgresql_snapshot = '{snapshot}'", ], ) check_tables_are_synchronized(instance, table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)" .format(table_name)) check_tables_are_synchronized(instance, table_name) instance.restart_clickhouse() instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(20000, 10000)" .format(table_name)) check_tables_are_synchronized(instance, table_name) pg_manager.drop_materialized_db() drop_replication_slot(replication_connection, slot_name) replication_connection.close()
def test_table_schema_changes_2(started_cluster): cursor = pg_manager.get_db_cursor() table_name = "test_table" create_postgres_table(cursor, table_name, template=postgres_table_template_2) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number, number from numbers(25)" ) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ "materialized_postgresql_allow_automatic_update = 1, materialized_postgresql_tables_list='test_table'" ], ) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number, number, number from numbers(25, 25)" ) check_tables_are_synchronized(instance, table_name) cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value1") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value2") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value1 Text") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value2 Text") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value3") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value3 Text") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value4 Text") cursor.execute(f"UPDATE {table_name} SET value3 = 'kek' WHERE key%2=0") check_tables_are_synchronized(instance, table_name) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number) from numbers(50, 25)" ) cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value5 Integer") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value2") instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), number from numbers(75, 25)" ) check_tables_are_synchronized(instance, table_name) instance.restart_clickhouse() check_tables_are_synchronized(instance, table_name) cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value5") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value5 Text") instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number) from numbers(100, 25)" ) check_tables_are_synchronized(instance, table_name) cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value6 Text") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value7 Integer") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value8 Integer") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value5") instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number), number, number from numbers(125, 25)" ) check_tables_are_synchronized(instance, table_name)
def test_database_with_single_non_default_schema(started_cluster): cursor = pg_manager.get_db_cursor() NUM_TABLES = 5 schema_name = 'test_schema' materialized_db = 'test_database' clickhouse_postgres_db = 'postgres_database_with_schema' global insert_counter insert_counter = 0 def insert_into_tables(): global insert_counter clickhouse_postgres_db = 'postgres_database_with_schema' for i in range(NUM_TABLES): table_name = f'postgresql_replica_{i}' instance.query( f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)" ) insert_counter += 1 def assert_show_tables(expected): result = instance.query('SHOW TABLES FROM test_database') assert (result == expected) print('assert show tables Ok') def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print('checking table', i) check_tables_are_synchronized( instance, f"postgresql_replica_{i}", postgres_database=clickhouse_postgres_db) print('synchronization Ok') create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) for i in range(NUM_TABLES): create_postgres_table_with_schema(cursor, schema_name, f'postgresql_replica_{i}') insert_into_tables() pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ f"materialized_postgresql_schema = '{schema_name}'", "materialized_postgresql_allow_automatic_update = 1" ]) insert_into_tables() check_all_tables_are_synchronized() assert_show_tables( "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n" ) instance.restart_clickhouse() check_all_tables_are_synchronized() assert_show_tables( "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n" ) insert_into_tables() check_all_tables_are_synchronized() print('ALTER') altered_table = random.randint(0, NUM_TABLES - 1) cursor.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer" .format(altered_table)) instance.query( f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)" ) assert_number_of_columns(instance, 3, f'postgresql_replica_{altered_table}') check_tables_are_synchronized(instance, f"postgresql_replica_{altered_table}", postgres_database=clickhouse_postgres_db) print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}") assert not instance.contains_in_log( "from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}") check_tables_are_synchronized(instance, detached_table_name, postgres_database=clickhouse_postgres_db)
def test_quoting_1(started_cluster): table_name = "user" pg_manager.create_and_fill_postgres_table(table_name) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) check_tables_are_synchronized(instance, table_name)
def test_database_with_multiple_non_default_schemas_1(started_cluster): cursor = pg_manager.get_db_cursor() NUM_TABLES = 5 schema_name = "test_schema" clickhouse_postgres_db = "postgres_database_with_schema" materialized_db = "test_database" publication_tables = "" global insert_counter insert_counter = 0 def insert_into_tables(): global insert_counter clickhouse_postgres_db = "postgres_database_with_schema" for i in range(NUM_TABLES): table_name = f"postgresql_replica_{i}" instance.query( f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)" ) insert_counter += 1 def assert_show_tables(expected): result = instance.query("SHOW TABLES FROM test_database") assert result == expected print("assert show tables Ok") def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print("checking table", i) check_tables_are_synchronized( instance, "postgresql_replica_{}".format(i), schema_name=schema_name, postgres_database=clickhouse_postgres_db, ) print("synchronization Ok") create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name, ) for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) create_postgres_table_with_schema(cursor, schema_name, table_name) if publication_tables != "": publication_tables += ", " publication_tables += schema_name + "." + table_name insert_into_tables() pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ f"materialized_postgresql_tables_list = '{publication_tables}'", "materialized_postgresql_tables_list_with_schema=1", "materialized_postgresql_allow_automatic_update = 1", ], ) check_all_tables_are_synchronized() assert_show_tables( "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n" ) instance.restart_clickhouse() check_all_tables_are_synchronized() assert_show_tables( "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n" ) insert_into_tables() check_all_tables_are_synchronized() print("ALTER") altered_table = random.randint(0, NUM_TABLES - 1) cursor.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer" .format(altered_table)) instance.query( f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)" ) assert_number_of_columns( instance, 3, f"{schema_name}.postgresql_replica_{altered_table}") check_tables_are_synchronized( instance, f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db, ) print("DETACH-ATTACH") detached_table_name = "postgresql_replica_1" instance.query( f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}` PERMANENTLY" ) assert not instance.contains_in_log( "from publication, because table does not exist in PostgreSQL") instance.query( f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`" ) assert_show_tables( "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n" ) check_tables_are_synchronized( instance, detached_table_name, schema_name=schema_name, postgres_database=clickhouse_postgres_db, )
def test_multiple_databases(started_cluster): NUM_TABLES = 5 conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=False, ) cursor = conn.cursor() pg_manager.create_postgres_db(cursor, "postgres_database_1") pg_manager.create_postgres_db(cursor, "postgres_database_2") conn1 = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name="postgres_database_1", ) conn2 = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name="postgres_database_2", ) cursor1 = conn1.cursor() cursor2 = conn2.cursor() pg_manager.create_clickhouse_postgres_db( cluster.postgres_ip, cluster.postgres_port, "postgres_database_1", "postgres_database_1", ) pg_manager.create_clickhouse_postgres_db( cluster.postgres_ip, cluster.postgres_port, "postgres_database_2", "postgres_database_2", ) cursors = [cursor1, cursor2] for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) create_postgres_table(cursors[cursor_id], table_name) instance.query( "INSERT INTO postgres_database_{}.{} SELECT number, number from numbers(50)" .format(cursor_id + 1, table_name)) print( "database 1 tables: ", instance.query( """SELECT name FROM system.tables WHERE database = 'postgres_database_1';""" ), ) print( "database 2 tables: ", instance.query( """SELECT name FROM system.tables WHERE database = 'postgres_database_2';""" ), ) pg_manager.create_materialized_db( started_cluster.postgres_ip, started_cluster.postgres_port, "test_database_1", "postgres_database_1", ) pg_manager.create_materialized_db( started_cluster.postgres_ip, started_cluster.postgres_port, "test_database_2", "postgres_database_2", ) cursors = [cursor1, cursor2] for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) instance.query( "INSERT INTO postgres_database_{}.{} SELECT 50 + number, number from numbers(50)" .format(cursor_id + 1, table_name)) for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = "postgresql_replica_{}".format(i) check_tables_are_synchronized( instance, table_name, "key", "postgres_database_{}".format(cursor_id + 1), "test_database_{}".format(cursor_id + 1), )
def test_database_with_multiple_non_default_schemas_2(started_cluster): cursor = pg_manager.get_db_cursor() NUM_TABLES = 2 schemas_num = 2 schema_list = "schema0, schema1" materialized_db = "test_database" global insert_counter insert_counter = 0 def check_all_tables_are_synchronized(): for i in range(schemas_num): schema_name = f"schema{i}" clickhouse_postgres_db = f"clickhouse_postgres_db{i}" for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" print(f"checking table {schema_name}.{table_name}") check_tables_are_synchronized( instance, f"{table_name}", schema_name=schema_name, postgres_database=clickhouse_postgres_db, ) print("synchronized Ok") def insert_into_tables(): global insert_counter for i in range(schemas_num): clickhouse_postgres_db = f"clickhouse_postgres_db{i}" for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" instance.query( f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)" ) insert_counter += 1 def assert_show_tables(expected): result = instance.query("SHOW TABLES FROM test_database") assert result == expected print("assert show tables Ok") for i in range(schemas_num): schema_name = f"schema{i}" clickhouse_postgres_db = f"clickhouse_postgres_db{i}" create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name, ) for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" create_postgres_table_with_schema(cursor, schema_name, table_name) insert_into_tables() pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[ f"materialized_postgresql_schema_list = '{schema_list}'", "materialized_postgresql_allow_automatic_update = 1", ], ) check_all_tables_are_synchronized() insert_into_tables() assert_show_tables( "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n" ) instance.restart_clickhouse() assert_show_tables( "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n" ) check_all_tables_are_synchronized() insert_into_tables() check_all_tables_are_synchronized() print("ALTER") altered_schema = random.randint(0, schemas_num - 1) altered_table = random.randint(0, NUM_TABLES - 1) clickhouse_postgres_db = f"clickhouse_postgres_db{altered_schema}" cursor.execute( f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer" ) instance.query( f"INSERT INTO clickhouse_postgres_db{altered_schema}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(1000 * {insert_counter}, 1000)" ) assert_number_of_columns( instance, 3, f"schema{altered_schema}.postgresql_replica_{altered_table}") check_tables_are_synchronized( instance, f"postgresql_replica_{altered_table}", schema_name=f"schema{altered_schema}", postgres_database=clickhouse_postgres_db, ) print("DETACH-ATTACH") detached_table_name = "postgresql_replica_1" detached_table_schema = "schema0" clickhouse_postgres_db = f"clickhouse_postgres_db0" instance.query( f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}` PERMANENTLY" ) assert not instance.contains_in_log( "from publication, because table does not exist in PostgreSQL") instance.query( f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`" ) assert_show_tables( "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n" ) check_tables_are_synchronized( instance, f"postgresql_replica_{altered_table}", schema_name=detached_table_schema, postgres_database=clickhouse_postgres_db, )
def test_many_concurrent_queries(started_cluster): query_pool = [ "DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;", "UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;", "DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;", "UPDATE postgresql_replica_{} SET value = value*5 WHERE key % 2 = 1;", "DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;", "UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;", "DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;", "UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;", "DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;", "UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;", "DELETE FROM postgresql_replica_{} WHERE value%5 = 0;", ] NUM_TABLES = 5 conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, numbers=10000) def attack(thread_id): print("thread {}".format(thread_id)) k = 10000 for i in range(20): query_id = random.randrange(0, len(query_pool) - 1) table_id = random.randrange(0, 5) # num tables # random update / delete query cursor.execute(query_pool[query_id].format(table_id)) print("table {} query {} ok".format(table_id, query_id)) # allow some thread to do inserts (not to violate key constraints) if thread_id < 5: print("try insert table {}".format(thread_id)) instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT {}*10000*({} + number), number from numbers(1000)" .format(i, thread_id, k)) k += 1 print("insert table {} ok".format(thread_id)) if i == 5: # also change primary key value print("try update primary key {}".format(thread_id)) cursor.execute( "UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0" .format(thread_id, i + 1, i + 1)) print("update primary key {} ok".format(thread_id)) n = [10000] threads = [] threads_num = 16 for i in range(threads_num): threads.append(threading.Thread(target=attack, args=(i, ))) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 1)) thread.start() n[0] = 50000 for table_id in range(NUM_TABLES): n[0] += 1 instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT {} + number, number from numbers(5000)" .format(table_id, n[0])) # cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(table_id, table_id+1, table_id+1)) for thread in threads: thread.join() for i in range(NUM_TABLES): check_tables_are_synchronized(instance, "postgresql_replica_{}".format(i)) count1 = instance.query( "SELECT count() FROM postgres_database.postgresql_replica_{}". format(i)) count2 = instance.query( "SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})" .format(i)) assert int(count1) == int(count2) print(count1, count2)
def test_table_schema_changes(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): create_postgres_table( cursor, "postgresql_replica_{}".format(i), template=postgres_table_template_2, ) instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)" .format(i, i, i, i)) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=["materialized_postgresql_allow_automatic_update = 1"], ) for i in range(NUM_TABLES): instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)" .format(i, i, i, i)) check_several_tables_are_synchronized(instance, NUM_TABLES) expected = instance.query( "SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key" ) altered_idx = random.randint(0, 4) altered_table = f"postgresql_replica_{altered_idx}" cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") for i in range(NUM_TABLES): cursor.execute( f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") cursor.execute(f"UPDATE {altered_table} SET value3 = 12 WHERE key%2=0") time.sleep(2) assert_nested_table_is_created(instance, altered_table) assert_number_of_columns(instance, 3, altered_table) check_tables_are_synchronized(instance, altered_table) print("check1 OK") check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): if i != altered_idx: instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {}, {} from numbers(49)" .format(i, i, i, i)) else: instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)" .format(i, i, i)) check_tables_are_synchronized(instance, altered_table) print("check2 OK") check_several_tables_are_synchronized(instance, NUM_TABLES)
def test_add_new_table_to_replication(started_cluster): cursor = pg_manager.get_db_cursor() cursor.execute("DROP TABLE IF EXISTS test_table") NUM_TABLES = 5 pg_manager.create_and_fill_postgres_tables_from_cursor( cursor, NUM_TABLES, 10000) pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) check_several_tables_are_synchronized(instance, NUM_TABLES) result = instance.query("SHOW TABLES FROM test_database") assert ( result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n" ) table_name = "postgresql_replica_5" pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) result = instance.query("SHOW CREATE DATABASE test_database") assert (result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(" ) # Check without ip assert ( result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") result = instance.query_and_get_error( "ALTER DATABASE test_database MODIFY SETTING materialized_postgresql_tables_list='tabl1'" ) assert ( "Changing setting `materialized_postgresql_tables_list` is not allowed" in result) result = instance.query_and_get_error( "ALTER DATABASE test_database MODIFY SETTING materialized_postgresql_tables='tabl1'" ) assert "Database engine MaterializedPostgreSQL does not support setting" in result instance.query(f"ATTACH TABLE test_database.{table_name}") result = instance.query("SHOW TABLES FROM test_database") assert ( result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\n" ) check_tables_are_synchronized(instance, table_name) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)" ) check_tables_are_synchronized(instance, table_name) result = instance.query_and_get_error( f"ATTACH TABLE test_database.{table_name}") assert "Table test_database.postgresql_replica_5 already exists" in result result = instance.query_and_get_error( "ATTACH TABLE test_database.unknown_table") assert "PostgreSQL table unknown_table does not exist" in result result = instance.query("SHOW CREATE DATABASE test_database") assert (result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert ( result[-180:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5\\'\n" ) table_name = "postgresql_replica_6" create_postgres_table(cursor, table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)" .format(table_name)) instance.query(f"ATTACH TABLE test_database.{table_name}") instance.restart_clickhouse() table_name = "postgresql_replica_7" create_postgres_table(cursor, table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)" .format(table_name)) instance.query(f"ATTACH TABLE test_database.{table_name}") result = instance.query("SHOW CREATE DATABASE test_database") assert (result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert ( result[-222:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5,postgresql_replica_6,postgresql_replica_7\\'\n" ) instance.query( f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)" ) result = instance.query("SHOW TABLES FROM test_database") assert ( result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\npostgresql_replica_6\npostgresql_replica_7\n" ) check_several_tables_are_synchronized(instance, NUM_TABLES + 3)
def test_different_data_types(started_cluster): conn = get_postgres_conn( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, ) cursor = conn.cursor() cursor.execute("drop table if exists test_data_types;") cursor.execute("drop table if exists test_array_data_type;") cursor.execute("""CREATE TABLE test_data_types ( id integer PRIMARY KEY, a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, h timestamp, i date, j decimal(5, 5), k numeric(5, 5))""") cursor.execute("""CREATE TABLE test_array_data_type ( key Integer NOT NULL PRIMARY KEY, a Date[] NOT NULL, -- Date b Timestamp[] NOT NULL, -- DateTime64(6) c real[][] NOT NULL, -- Float32 d double precision[][] NOT NULL, -- Float64 e decimal(5, 5)[][][] NOT NULL, -- Decimal32 f integer[][][] NOT NULL, -- Int32 g Text[][][][][] NOT NULL, -- String h Integer[][][], -- Nullable(Int32) i Char(2)[][][][], -- Nullable(String) k Char(2)[] -- Nullable(String) )""") pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for i in range(10): instance.query(""" INSERT INTO postgres_database.test_data_types VALUES ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 0.2, 0.2)""" .format(i)) check_tables_are_synchronized(instance, "test_data_types", "id") result = instance.query( "SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;") assert ( result == "0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t0.2\t0.2\n" ) for i in range(10): col = random.choice(["a", "b", "c"]) cursor.execute("UPDATE test_data_types SET {} = {};".format(col, i)) cursor.execute( """UPDATE test_data_types SET i = '2020-12-12';""".format(col, i)) check_tables_are_synchronized(instance, "test_data_types", "id") instance.query( "INSERT INTO postgres_database.test_array_data_type " "VALUES (" "0, " "['2000-05-12', '2000-05-12'], " "['2000-05-12 12:12:12.012345', '2000-05-12 12:12:12.012345'], " "[[1.12345], [1.12345], [1.12345]], " "[[1.1234567891], [1.1234567891], [1.1234567891]], " "[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], " "[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], " "[[[[['winx', 'winx', 'winx']]]]], " "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], " "[[[[NULL]]]], " "[]" ")") expected = ( "0\t" + "['2000-05-12','2000-05-12']\t" + "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t" + "[[1.12345],[1.12345],[1.12345]]\t" + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t" + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t" "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t" "[[[[['winx','winx','winx']]]]]\t" "[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t" "[[[[NULL]]]]\t" "[]\n") check_tables_are_synchronized(instance, "test_array_data_type") result = instance.query( "SELECT * FROM test_database.test_array_data_type ORDER BY key;") assert result == expected pg_manager.drop_materialized_db() cursor.execute("drop table if exists test_data_types;") cursor.execute("drop table if exists test_array_data_type;")