Example #1
0
def test_freeze_unfreeze(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 4096)))
    node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup1'")
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-04", 4096)))
    node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup2'")

    node.query("TRUNCATE TABLE s3_test")
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            FILES_OVERHEAD_PER_PART_WIDE * 2)

    # Unfreeze single partition from backup1.
    node.query(
        "ALTER TABLE s3_test UNFREEZE PARTITION '2020-01-03' WITH NAME 'backup1'"
    )
    # Unfreeze all partitions from backup2.
    node.query("ALTER TABLE s3_test UNFREEZE WITH NAME 'backup2'")

    # Data should be removed from S3.
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD)
Example #2
0
def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part):
    create_table(
        cluster,
        "hdfs_test",
        additional_settings="min_rows_for_wide_part={}".format(min_rows_for_wide_part),
    )

    node = cluster.instances["node"]

    values1 = generate_values("2020-01-03", 4096)
    node.query("INSERT INTO hdfs_test VALUES {}".format(values1))
    assert (
        node.query("SELECT * FROM hdfs_test order by dt, id FORMAT Values") == values1
    )

    fs = HdfsClient(hosts=cluster.hdfs_ip)

    hdfs_objects = fs.listdir("/clickhouse")
    print(hdfs_objects)
    assert len(hdfs_objects) == FILES_OVERHEAD + files_per_part

    values2 = generate_values("2020-01-04", 4096)
    node.query("INSERT INTO hdfs_test VALUES {}".format(values2))
    assert (
        node.query("SELECT * FROM hdfs_test ORDER BY dt, id FORMAT Values")
        == values1 + "," + values2
    )

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + files_per_part * 2

    assert (
        node.query("SELECT count(*) FROM hdfs_test where id = 1 FORMAT Values") == "(2)"
    )
Example #3
0
def test_move_partition_to_another_disk(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-04", 4096)))
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            FILES_OVERHEAD_PER_PART_WIDE * 2)

    node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            FILES_OVERHEAD_PER_PART_WIDE)

    node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 's3'")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            FILES_OVERHEAD_PER_PART_WIDE * 2)
Example #4
0
def test_s3_disk_apply_new_settings(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")

    def get_s3_requests():
        node.query("SYSTEM FLUSH LOGS")
        return int(
            node.query(
                "SELECT value FROM system.events WHERE event='S3WriteRequestsCount'"
            ))

    s3_requests_before = get_s3_requests()
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 4096)))
    s3_requests_to_write_partition = get_s3_requests() - s3_requests_before

    # Force multi-part upload mode.
    replace_config(
        CONFIG_PATH,
        "<s3_max_single_part_upload_size>33554432</s3_max_single_part_upload_size>",
        "<s3_max_single_part_upload_size>0</s3_max_single_part_upload_size>",
    )

    node.query("SYSTEM RELOAD CONFIG")

    s3_requests_before = get_s3_requests()
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-04", 4096, -1)))

    # There should be 3 times more S3 requests because multi-part upload mode uses 3 requests to upload object.
    assert get_s3_requests(
    ) - s3_requests_before == s3_requests_to_write_partition * 3
Example #5
0
def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part,
                              node_name):
    node = cluster.instances[node_name]
    create_table(node,
                 "s3_test",
                 min_rows_for_wide_part=min_rows_for_wide_part)
    minio = cluster.minio_client

    values1 = generate_values("2020-01-03", 4096)
    node.query("INSERT INTO s3_test VALUES {}".format(values1))
    assert node.query(
        "SELECT * FROM s3_test order by dt, id FORMAT Values") == values1
    assert (len(list(minio.list_objects(
        cluster.minio_bucket, "data/"))) == FILES_OVERHEAD + files_per_part)

    values2 = generate_values("2020-01-04", 4096)
    node.query("INSERT INTO s3_test VALUES {}".format(values2))
    assert (node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values")
            == values1 + "," + values2)
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            files_per_part * 2)

    assert (node.query(
        "SELECT count(*) FROM s3_test where id = 1 FORMAT Values") == "(2)")
Example #6
0
def test_table_manipulations(cluster):
    create_table(cluster, "hdfs_test")

    node = cluster.instances["node"]
    fs = HdfsClient(hosts=cluster.hdfs_ip)

    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-03", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-04", 4096))
    )

    node.query("RENAME TABLE hdfs_test TO hdfs_renamed")
    assert node.query("SELECT count(*) FROM hdfs_renamed FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("RENAME TABLE hdfs_renamed TO hdfs_test")
    assert node.query("CHECK TABLE hdfs_test FORMAT Values") == "(1)"

    node.query("DETACH TABLE hdfs_test")
    node.query("ATTACH TABLE hdfs_test")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("TRUNCATE TABLE hdfs_test")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD
Example #7
0
def test_alter_table_columns(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096, -1)))

    node.query("ALTER TABLE s3_test ADD COLUMN col1 UInt64 DEFAULT 1")
    # To ensure parts have merged
    node.query("OPTIMIZE TABLE s3_test")

    assert node.query("SELECT sum(col1) FROM s3_test FORMAT Values") == "(8192)"
    assert node.query("SELECT sum(col1) FROM s3_test WHERE id > 0 FORMAT Values") == "(4096)"
    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN)

    node.query("ALTER TABLE s3_test MODIFY COLUMN col1 String", settings={"mutations_sync": 2})

    assert node.query("SELECT distinct(col1) FROM s3_test FORMAT Values") == "('1')"
    # and file with mutation
    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + 1)

    node.query("ALTER TABLE s3_test DROP COLUMN col1", settings={"mutations_sync": 2})

    # and 2 files with mutations
    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2)
Example #8
0
def test_move_partition_to_another_disk(cluster):
    create_table(cluster, "hdfs_test")

    node = cluster.instances["node"]
    fs = HdfsClient(hosts=cluster.hdfs_ip)

    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-03", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-04", 4096))
    )
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE hdfs_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE

    node.query("ALTER TABLE hdfs_test MOVE PARTITION '2020-01-04' TO DISK 'hdfs'")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
Example #9
0
def test_attach_detach_partition(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096)))
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)"
    assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE

    node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-04'")
    node.query("ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1})
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
    assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD
Example #10
0
def test_table_manipulations(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096)))

    node.query("RENAME TABLE s3_test TO s3_renamed")
    assert node.query("SELECT count(*) FROM s3_renamed FORMAT Values") == "(8192)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
    node.query("RENAME TABLE s3_renamed TO s3_test")

    assert node.query("CHECK TABLE s3_test FORMAT Values") == "(1)"

    node.query("DETACH TABLE s3_test")
    node.query("ATTACH TABLE s3_test")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("TRUNCATE TABLE s3_test")
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
    assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD
Example #11
0
def test_freeze_system_unfreeze(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    create_table(node, "s3_test_removed")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-04", 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-04", 4096)))
    node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup3'")
    node.query("ALTER TABLE s3_test_removed FREEZE WITH NAME 'backup3'")

    node.query("TRUNCATE TABLE s3_test")
    node.query("DROP TABLE s3_test_removed NO DELAY")
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD +
            FILES_OVERHEAD_PER_PART_WIDE * 2)

    # Unfreeze all data from backup3.
    node.query("SYSTEM UNFREEZE WITH NAME 'backup3'")

    # Data should be removed from S3.
    assert (len(list(minio.list_objects(cluster.minio_bucket,
                                        "data/"))) == FILES_OVERHEAD)
Example #12
0
def test_s3_disk_restart_during_load(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")

    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 1024 * 1024)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 1024 * 1024, -1)))

    def read():
        for ii in range(0, 20):
            logging.info("Executing %d query", ii)
            assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
            logging.info("Query %d executed", ii)
            time.sleep(0.2)

    def restart_disk():
        for iii in range(0, 5):
            logging.info("Restarting disk, attempt %d", iii)
            node.query("SYSTEM RESTART DISK s3")
            logging.info("Disk restarted, attempt %d", iii)
            time.sleep(0.5)

    threads = []
    for i in range(0, 4):
        threads.append(SafeThread(target=read))

    threads.append(SafeThread(target=restart_disk))

    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()
Example #13
0
def test_inserts_selects(cluster):
    node = cluster.instances[NODE_NAME]
    create_table(node, TABLE_NAME)

    values1 = generate_values('2020-01-03', 4096)
    azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values1}")
    assert azure_query(node, f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") == values1

    values2 = generate_values('2020-01-04', 4096)
    azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values2}")
    assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY dt, id FORMAT Values") == values1 + "," + values2

    assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} where id = 1 FORMAT Values") == "(2)"
Example #14
0
def test_alter_table_columns(cluster):
    create_table(cluster, "hdfs_test")

    node = cluster.instances["node"]
    fs = HdfsClient(hosts=cluster.hdfs_ip)

    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-03", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(
            generate_values("2020-01-03", 4096, -1)
        )
    )

    node.query("ALTER TABLE hdfs_test ADD COLUMN col1 UInt64 DEFAULT 1")
    # To ensure parts have merged
    node.query("OPTIMIZE TABLE hdfs_test")

    assert node.query("SELECT sum(col1) FROM hdfs_test FORMAT Values") == "(8192)"
    assert (
        node.query("SELECT sum(col1) FROM hdfs_test WHERE id > 0 FORMAT Values")
        == "(4096)"
    )
    wait_for_delete_hdfs_objects(
        cluster,
        FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN,
    )

    node.query(
        "ALTER TABLE hdfs_test MODIFY COLUMN col1 String",
        settings={"mutations_sync": 2},
    )

    assert node.query("SELECT distinct(col1) FROM hdfs_test FORMAT Values") == "('1')"
    # and file with mutation
    wait_for_delete_hdfs_objects(
        cluster,
        FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + 1,
    )

    node.query("ALTER TABLE hdfs_test DROP COLUMN col1", settings={"mutations_sync": 2})

    # and 2 files with mutations
    wait_for_delete_hdfs_objects(
        cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2
    )
Example #15
0
def test_attach_detach_partition(cluster):
    create_table(cluster, "hdfs_test")

    node = cluster.instances["node"]
    fs = HdfsClient(hosts=cluster.hdfs_ip)

    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-03", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-04", 4096))
    )
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE hdfs_test DETACH PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE hdfs_test ATTACH PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2

    node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'")
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE

    node.query("ALTER TABLE hdfs_test DETACH PARTITION '2020-01-04'")
    node.query(
        "ALTER TABLE hdfs_test DROP DETACHED PARTITION '2020-01-04'",
        settings={"allow_drop_detached": 1},
    )
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD
Example #16
0
def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
    settings = {}
    if merge_vertical:
        settings["vertical_merge_algorithm_min_rows_to_activate"] = 0
        settings["vertical_merge_algorithm_min_columns_to_activate"] = 0

    node = cluster.instances[node_name]
    create_table(node, "s3_test", **settings)
    minio = cluster.minio_client

    node.query("SYSTEM STOP MERGES s3_test")
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 1024)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 2048)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 1024, -1)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 2048, -1)))
    node.query("INSERT INTO s3_test VALUES {}".format(
        generate_values("2020-01-03", 4096, -1)))
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert (node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values")
            == "(8192)")
    assert (len(list(minio.list_objects(
        cluster.minio_bucket,
        "data/"))) == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD)

    node.query("SYSTEM START MERGES s3_test")

    # Wait for merges and old parts deletion
    for attempt in range(0, 10):
        parts_count = node.query(
            "SELECT COUNT(*) FROM system.parts WHERE table = 's3_test' and active = 1 FORMAT Values"
        )

        if parts_count == "(1)":
            break

        if attempt == 9:
            assert parts_count == "(1)"

        time.sleep(1)

    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert (node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values")
            == "(8192)")
    wait_for_delete_s3_objects(cluster,
                               FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD,
                               timeout=45)
Example #17
0
def test_move_replace_partition_to_another_table(cluster, node_name):
    node = cluster.instances[node_name]
    create_table(node, "s3_test")
    minio = cluster.minio_client

    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 4096)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096, -1)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-06', 4096, -1)))
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4

    create_table(node, "s3_clone")

    node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-03' TO TABLE s3_clone")
    node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-05' TO TABLE s3_clone")
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
    assert node.query("SELECT sum(id) FROM s3_clone FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)"
    # Number of objects in S3 should be unchanged.
    assert len(list(
        minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4

    # Add new partitions to source table, but with different values and replace them from copied table.
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096, -1)))
    node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 4096)))
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
    assert len(list(
        minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6

    node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-03' FROM s3_clone")
    node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-05' FROM s3_clone")
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
    assert node.query("SELECT sum(id) FROM s3_clone FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)"

    # Wait for outdated partitions deletion.
    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4)

    node.query("DROP TABLE s3_clone NO DELAY")
    assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
    # Data should remain in S3
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4

    node.query("ALTER TABLE s3_test FREEZE")
    # Number S3 objects should be unchanged.
    assert len(
        list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4

    node.query("DROP TABLE s3_test NO DELAY")
    # Backup data should remain in S3.

    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4)

    for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')):
        minio.remove_object(cluster.minio_bucket, obj.object_name)
Example #18
0
def test_move_replace_partition_to_another_table(cluster):
    create_table(cluster, "hdfs_test")

    node = cluster.instances["node"]
    fs = HdfsClient(hosts=cluster.hdfs_ip)

    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-03", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-04", 4096))
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(
            generate_values("2020-01-05", 4096, -1)
        )
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(
            generate_values("2020-01-06", 4096, -1)
        )
    )
    assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4

    create_table(cluster, "hdfs_clone")

    node.query("ALTER TABLE hdfs_test MOVE PARTITION '2020-01-03' TO TABLE hdfs_clone")
    node.query("ALTER TABLE hdfs_test MOVE PARTITION '2020-01-05' TO TABLE hdfs_clone")
    assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"
    assert node.query("SELECT sum(id) FROM hdfs_clone FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_clone FORMAT Values") == "(8192)"

    # Number of objects in HDFS should be unchanged.
    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4

    # Add new partitions to source table, but with different values and replace them from copied table.
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(
            generate_values("2020-01-03", 4096, -1)
        )
    )
    node.query(
        "INSERT INTO hdfs_test VALUES {}".format(generate_values("2020-01-05", 4096))
    )
    assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"

    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6

    node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone")
    node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-05' FROM hdfs_clone")
    assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"
    assert node.query("SELECT sum(id) FROM hdfs_clone FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_clone FORMAT Values") == "(8192)"

    # Wait for outdated partitions deletion.
    print(1)
    wait_for_delete_hdfs_objects(
        cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
    )

    node.query("DROP TABLE hdfs_clone NO DELAY")
    assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
    assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"

    # Data should remain in hdfs
    hdfs_objects = fs.listdir("/clickhouse")
    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4