Exemple #1
0
def test_virtual_columns(started_cluster):
    conn = get_postgres_conn(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        database=True,
    )
    cursor = conn.cursor()
    table_name = "postgresql_replica_0"
    create_postgres_table(cursor, table_name)

    pg_manager.create_materialized_db(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        settings=["materialized_postgresql_allow_automatic_update = 1"],
    )

    assert_nested_table_is_created(instance, table_name)
    instance.query(
        f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10)"
    )
    check_tables_are_synchronized(instance, table_name)

    # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time.
    result = instance.query(
        f"SELECT key, value, _sign, _version FROM test_database.{table_name};")
    print(result)

    cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value2 integer")
    instance.query(
        f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(10, 10)"
    )
    assert_number_of_columns(instance, 3, table_name)
    check_tables_are_synchronized(instance, table_name)

    result = instance.query(
        "SELECT key, value, value2,  _sign, _version FROM test_database.postgresql_replica_0;"
    )
    print(result)

    instance.query(
        f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(20, 10)"
    )
    check_tables_are_synchronized(instance, table_name)

    result = instance.query(
        f"SELECT key, value, value2,  _sign, _version FROM test_database.{table_name};"
    )
    print(result)
Exemple #2
0
def test_table_schema_changes(started_cluster):
    conn = get_postgres_conn(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        database=True,
    )
    cursor = conn.cursor()
    NUM_TABLES = 5

    for i in range(NUM_TABLES):
        create_postgres_table(
            cursor,
            "postgresql_replica_{}".format(i),
            template=postgres_table_template_2,
        )
        instance.query(
            "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)"
            .format(i, i, i, i))

    pg_manager.create_materialized_db(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        settings=["materialized_postgresql_allow_automatic_update = 1"],
    )

    for i in range(NUM_TABLES):
        instance.query(
            "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)"
            .format(i, i, i, i))

    check_several_tables_are_synchronized(instance, NUM_TABLES)

    expected = instance.query(
        "SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key"
    )

    altered_idx = random.randint(0, 4)
    altered_table = f"postgresql_replica_{altered_idx}"
    cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2")

    for i in range(NUM_TABLES):
        cursor.execute(
            f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})")
        cursor.execute(f"UPDATE {altered_table} SET value3 = 12 WHERE key%2=0")

    time.sleep(2)
    assert_nested_table_is_created(instance, altered_table)
    assert_number_of_columns(instance, 3, altered_table)
    check_tables_are_synchronized(instance, altered_table)
    print("check1 OK")

    check_several_tables_are_synchronized(instance, NUM_TABLES)

    for i in range(NUM_TABLES):
        if i != altered_idx:
            instance.query(
                "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {}, {} from numbers(49)"
                .format(i, i, i, i))
        else:
            instance.query(
                "INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)"
                .format(i, i, i))

    check_tables_are_synchronized(instance, altered_table)
    print("check2 OK")
    check_several_tables_are_synchronized(instance, NUM_TABLES)
Exemple #3
0
def test_database_with_multiple_non_default_schemas_1(started_cluster):
    cursor = pg_manager.get_db_cursor()

    NUM_TABLES = 5
    schema_name = "test_schema"
    clickhouse_postgres_db = "postgres_database_with_schema"
    materialized_db = "test_database"
    publication_tables = ""
    global insert_counter
    insert_counter = 0

    def insert_into_tables():
        global insert_counter
        clickhouse_postgres_db = "postgres_database_with_schema"
        for i in range(NUM_TABLES):
            table_name = f"postgresql_replica_{i}"
            instance.query(
                f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)"
            )
        insert_counter += 1

    def assert_show_tables(expected):
        result = instance.query("SHOW TABLES FROM test_database")
        assert result == expected
        print("assert show tables Ok")

    def check_all_tables_are_synchronized():
        for i in range(NUM_TABLES):
            print("checking table", i)
            check_tables_are_synchronized(
                instance,
                "postgresql_replica_{}".format(i),
                schema_name=schema_name,
                postgres_database=clickhouse_postgres_db,
            )
        print("synchronization Ok")

    create_postgres_schema(cursor, schema_name)
    pg_manager.create_clickhouse_postgres_db(
        ip=cluster.postgres_ip,
        port=cluster.postgres_port,
        name=clickhouse_postgres_db,
        schema_name=schema_name,
    )

    for i in range(NUM_TABLES):
        table_name = "postgresql_replica_{}".format(i)
        create_postgres_table_with_schema(cursor, schema_name, table_name)
        if publication_tables != "":
            publication_tables += ", "
        publication_tables += schema_name + "." + table_name

    insert_into_tables()
    pg_manager.create_materialized_db(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        settings=[
            f"materialized_postgresql_tables_list = '{publication_tables}'",
            "materialized_postgresql_tables_list_with_schema=1",
            "materialized_postgresql_allow_automatic_update = 1",
        ],
    )

    check_all_tables_are_synchronized()
    assert_show_tables(
        "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n"
    )

    instance.restart_clickhouse()
    check_all_tables_are_synchronized()
    assert_show_tables(
        "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n"
    )

    insert_into_tables()
    check_all_tables_are_synchronized()

    print("ALTER")
    altered_table = random.randint(0, NUM_TABLES - 1)
    cursor.execute(
        "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer"
        .format(altered_table))

    instance.query(
        f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)"
    )
    assert_number_of_columns(
        instance, 3, f"{schema_name}.postgresql_replica_{altered_table}")
    check_tables_are_synchronized(
        instance,
        f"postgresql_replica_{altered_table}",
        schema_name=schema_name,
        postgres_database=clickhouse_postgres_db,
    )

    print("DETACH-ATTACH")
    detached_table_name = "postgresql_replica_1"
    instance.query(
        f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}` PERMANENTLY"
    )
    assert not instance.contains_in_log(
        "from publication, because table does not exist in PostgreSQL")
    instance.query(
        f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`"
    )
    assert_show_tables(
        "test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n"
    )
    check_tables_are_synchronized(
        instance,
        detached_table_name,
        schema_name=schema_name,
        postgres_database=clickhouse_postgres_db,
    )
Exemple #4
0
def test_database_with_multiple_non_default_schemas_2(started_cluster):
    cursor = pg_manager.get_db_cursor()
    NUM_TABLES = 2
    schemas_num = 2
    schema_list = "schema0, schema1"
    materialized_db = "test_database"
    global insert_counter
    insert_counter = 0

    def check_all_tables_are_synchronized():
        for i in range(schemas_num):
            schema_name = f"schema{i}"
            clickhouse_postgres_db = f"clickhouse_postgres_db{i}"
            for ti in range(NUM_TABLES):
                table_name = f"postgresql_replica_{ti}"
                print(f"checking table {schema_name}.{table_name}")
                check_tables_are_synchronized(
                    instance,
                    f"{table_name}",
                    schema_name=schema_name,
                    postgres_database=clickhouse_postgres_db,
                )
        print("synchronized Ok")

    def insert_into_tables():
        global insert_counter
        for i in range(schemas_num):
            clickhouse_postgres_db = f"clickhouse_postgres_db{i}"
            for ti in range(NUM_TABLES):
                table_name = f"postgresql_replica_{ti}"
                instance.query(
                    f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)"
                )
        insert_counter += 1

    def assert_show_tables(expected):
        result = instance.query("SHOW TABLES FROM test_database")
        assert result == expected
        print("assert show tables Ok")

    for i in range(schemas_num):
        schema_name = f"schema{i}"
        clickhouse_postgres_db = f"clickhouse_postgres_db{i}"
        create_postgres_schema(cursor, schema_name)
        pg_manager.create_clickhouse_postgres_db(
            ip=cluster.postgres_ip,
            port=cluster.postgres_port,
            name=clickhouse_postgres_db,
            schema_name=schema_name,
        )
        for ti in range(NUM_TABLES):
            table_name = f"postgresql_replica_{ti}"
            create_postgres_table_with_schema(cursor, schema_name, table_name)

    insert_into_tables()
    pg_manager.create_materialized_db(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        settings=[
            f"materialized_postgresql_schema_list = '{schema_list}'",
            "materialized_postgresql_allow_automatic_update = 1",
        ],
    )

    check_all_tables_are_synchronized()
    insert_into_tables()
    assert_show_tables(
        "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n"
    )

    instance.restart_clickhouse()
    assert_show_tables(
        "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n"
    )
    check_all_tables_are_synchronized()
    insert_into_tables()
    check_all_tables_are_synchronized()

    print("ALTER")
    altered_schema = random.randint(0, schemas_num - 1)
    altered_table = random.randint(0, NUM_TABLES - 1)
    clickhouse_postgres_db = f"clickhouse_postgres_db{altered_schema}"
    cursor.execute(
        f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer"
    )

    instance.query(
        f"INSERT INTO clickhouse_postgres_db{altered_schema}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(1000 * {insert_counter}, 1000)"
    )
    assert_number_of_columns(
        instance, 3,
        f"schema{altered_schema}.postgresql_replica_{altered_table}")
    check_tables_are_synchronized(
        instance,
        f"postgresql_replica_{altered_table}",
        schema_name=f"schema{altered_schema}",
        postgres_database=clickhouse_postgres_db,
    )

    print("DETACH-ATTACH")
    detached_table_name = "postgresql_replica_1"
    detached_table_schema = "schema0"
    clickhouse_postgres_db = f"clickhouse_postgres_db0"
    instance.query(
        f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}` PERMANENTLY"
    )
    assert not instance.contains_in_log(
        "from publication, because table does not exist in PostgreSQL")
    instance.query(
        f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`"
    )
    assert_show_tables(
        "schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n"
    )
    check_tables_are_synchronized(
        instance,
        f"postgresql_replica_{altered_table}",
        schema_name=detached_table_schema,
        postgres_database=clickhouse_postgres_db,
    )
Exemple #5
0
def test_database_with_single_non_default_schema(started_cluster):
    cursor = pg_manager.get_db_cursor()
    NUM_TABLES = 5
    schema_name = 'test_schema'
    materialized_db = 'test_database'
    clickhouse_postgres_db = 'postgres_database_with_schema'
    global insert_counter
    insert_counter = 0

    def insert_into_tables():
        global insert_counter
        clickhouse_postgres_db = 'postgres_database_with_schema'
        for i in range(NUM_TABLES):
            table_name = f'postgresql_replica_{i}'
            instance.query(
                f"INSERT INTO {clickhouse_postgres_db}.{table_name} SELECT number, number from numbers(1000 * {insert_counter}, 1000)"
            )
        insert_counter += 1

    def assert_show_tables(expected):
        result = instance.query('SHOW TABLES FROM test_database')
        assert (result == expected)
        print('assert show tables Ok')

    def check_all_tables_are_synchronized():
        for i in range(NUM_TABLES):
            print('checking table', i)
            check_tables_are_synchronized(
                instance,
                f"postgresql_replica_{i}",
                postgres_database=clickhouse_postgres_db)
        print('synchronization Ok')

    create_postgres_schema(cursor, schema_name)
    pg_manager.create_clickhouse_postgres_db(ip=cluster.postgres_ip,
                                             port=cluster.postgres_port,
                                             name=clickhouse_postgres_db,
                                             schema_name=schema_name)

    for i in range(NUM_TABLES):
        create_postgres_table_with_schema(cursor, schema_name,
                                          f'postgresql_replica_{i}')

    insert_into_tables()
    pg_manager.create_materialized_db(
        ip=started_cluster.postgres_ip,
        port=started_cluster.postgres_port,
        settings=[
            f"materialized_postgresql_schema = '{schema_name}'",
            "materialized_postgresql_allow_automatic_update = 1"
        ])

    insert_into_tables()
    check_all_tables_are_synchronized()
    assert_show_tables(
        "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n"
    )

    instance.restart_clickhouse()
    check_all_tables_are_synchronized()
    assert_show_tables(
        "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n"
    )
    insert_into_tables()
    check_all_tables_are_synchronized()

    print('ALTER')
    altered_table = random.randint(0, NUM_TABLES - 1)
    cursor.execute(
        "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer"
        .format(altered_table))

    instance.query(
        f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)"
    )
    assert_number_of_columns(instance, 3,
                             f'postgresql_replica_{altered_table}')
    check_tables_are_synchronized(instance,
                                  f"postgresql_replica_{altered_table}",
                                  postgres_database=clickhouse_postgres_db)

    print('DETACH-ATTACH')
    detached_table_name = "postgresql_replica_1"
    instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}")
    assert not instance.contains_in_log(
        "from publication, because table does not exist in PostgreSQL")
    instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}")
    check_tables_are_synchronized(instance,
                                  detached_table_name,
                                  postgres_database=clickhouse_postgres_db)