コード例 #1
0
ファイル: test.py プロジェクト: wangzhen11aaa/ClickHouse
def test_seekable_formats(started_cluster):
    bucket = started_cluster.minio_bucket
    instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance

    table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')"
    instance.query(
        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) settings s3_truncate_on_insert=1"
    )

    result = instance.query(f"SELECT count() FROM {table_function}")
    assert (int(result) == 5000000)

    table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')"
    exec_query_with_retry(
        instance,
        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) settings s3_truncate_on_insert=1"
    )

    result = instance.query(f"SELECT count() FROM {table_function}")
    assert (int(result) == 5000000)

    instance.query("SYSTEM FLUSH LOGS")
    result = instance.query(
        f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM s3') AND memory_usage > 0 ORDER BY event_time desc"
    )
    print(result[:3])
    assert (int(result[:3]) < 200)
コード例 #2
0
ファイル: test.py プロジェクト: zzsmdfj/ClickHouse
def test_in_memory_alters(start_cluster):
    def check_parts_type(parts_num):
        assert node9.query(
            "SELECT part_type, count() FROM system.parts WHERE table = 'alters_table' \
             AND active GROUP BY part_type") == "InMemory\t{}\n".format(
                parts_num)

    node9.query(
        "INSERT INTO alters_table (date, id, s) VALUES (toDate('2020-10-10'), 1, 'ab'), (toDate('2020-10-10'), 2, 'cd')"
    )
    node9.query("ALTER TABLE alters_table ADD COLUMN col1 UInt32")
    node9.restart_clickhouse(kill=True)

    expected = "1\tab\t0\n2\tcd\t0\n"
    assert node9.query(
        "SELECT id, s, col1 FROM alters_table ORDER BY id") == expected
    check_parts_type(1)
    # After hard restart table can be in readonly mode
    exec_query_with_retry(
        node9,
        "INSERT INTO alters_table (date, id, col1) VALUES (toDate('2020-10-10'), 3, 100)"
    )
    node9.query("ALTER TABLE alters_table MODIFY COLUMN col1 String")
    node9.query("ALTER TABLE alters_table DROP COLUMN s")
    node9.restart_clickhouse(kill=True)

    check_parts_type(2)
    with pytest.raises(Exception):
        node9.query("SELECT id, s, col1 FROM alters_table")

    # Values of col1 was not materialized as integers, so they have
    # default string values after alter
    expected = "1\t_foo\n2\t_foo\n3\t100_foo\n"
    assert node9.query(
        "SELECT id, col1 || '_foo' FROM alters_table ORDER BY id") == expected
コード例 #3
0
ファイル: test.py プロジェクト: wangzhen11aaa/ClickHouse
def test_seekable_formats_url(started_cluster):
    bucket = started_cluster.minio_bucket
    instance = started_cluster.instances["dummy"]

    table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')"
    instance.query(
        f"insert into table function {table_function} select number, randomString(100) from numbers(5000000) settings s3_truncate_on_insert=1"
    )

    table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')"
    result = instance.query(f"SELECT count() FROM {table_function}")
    assert (int(result) == 5000000)

    table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')"
    exec_query_with_retry(
        instance,
        f"insert into table function {table_function} select number, randomString(100) from numbers(5000000) settings s3_truncate_on_insert=1"
    )

    table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_orc', 'ORC', 'a Int32, b String')"
    result = instance.query(f"SELECT count() FROM {table_function}")
    assert (int(result) == 5000000)

    instance.query("SYSTEM FLUSH LOGS")
    result = instance.query(
        f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM url') AND memory_usage > 0 ORDER BY event_time desc"
    )
    print(result[:3])
    assert (int(result[:3]) < 200)
コード例 #4
0
ファイル: test.py プロジェクト: wwjiang007/ClickHouse
def test_upgrade_while_mutation(start_cluster):
    node3.query("DROP TABLE IF EXISTS mt1")

    node3.query(
        "CREATE TABLE mt1 (EventDate Date, id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', 'node3') ORDER BY tuple()"
    )

    node3.query(
        "INSERT INTO mt1 select '2020-02-13', number from numbers(100000)")

    node3.query("SYSTEM STOP MERGES mt1")
    node3.query("ALTER TABLE mt1 DELETE WHERE id % 2 == 0")

    node3.query("DETACH TABLE mt1")  # stop being leader
    node3.restart_with_latest_version(signal=9, fix_metadata=True)

    # checks for readonly
    exec_query_with_retry(node3,
                          "OPTIMIZE TABLE mt1",
                          sleep_time=5,
                          retry_count=60)

    node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000",
                settings={"mutations_sync": "2"})
    # will delete nothing, but previous async mutation will finish with this query

    assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n")

    node3.query("DROP TABLE mt1")
コード例 #5
0
ファイル: test.py プロジェクト: wwjiang007/ClickHouse
def test_mutate_and_upgrade(start_cluster):
    for node in [node1, node2]:
        node.query("DROP TABLE IF EXISTS mt")
        node.query(
            "CREATE TABLE mt (EventDate Date, id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{}') ORDER BY tuple()"
            .format(node.name))

    node1.query("INSERT INTO mt VALUES ('2020-02-13', 1), ('2020-02-13', 2);")

    node1.query("ALTER TABLE mt DELETE WHERE id = 2",
                settings={"mutations_sync": "2"})
    node2.query("SYSTEM SYNC REPLICA mt", timeout=15)

    node2.query("DETACH TABLE mt")  # stop being leader
    node1.query("DETACH TABLE mt")  # stop being leader
    node1.restart_with_latest_version(signal=9, fix_metadata=True)
    node2.restart_with_latest_version(signal=9, fix_metadata=True)

    # After hard restart table can be in readonly mode
    exec_query_with_retry(node2,
                          "INSERT INTO mt VALUES ('2020-02-13', 3)",
                          retry_count=60)
    exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt", retry_count=60)

    assert node1.query("SELECT COUNT() FROM mt") == "2\n"
    assert node2.query("SELECT COUNT() FROM mt") == "2\n"

    node1.query("INSERT INTO mt VALUES ('2020-02-13', 4);")

    node2.query("SYSTEM SYNC REPLICA mt", timeout=15)

    assert node1.query("SELECT COUNT() FROM mt") == "3\n"
    assert node2.query("SELECT COUNT() FROM mt") == "3\n"

    node2.query("ALTER TABLE mt DELETE WHERE id = 3",
                settings={"mutations_sync": "2"})

    node1.query("SYSTEM SYNC REPLICA mt", timeout=15)

    assert node1.query("SELECT COUNT() FROM mt") == "2\n"
    assert node2.query("SELECT COUNT() FROM mt") == "2\n"

    node1.query(
        "ALTER TABLE mt MODIFY COLUMN id Int32 DEFAULT 0",
        settings={"replication_alter_partitions_sync": "2"},
    )

    node2.query("OPTIMIZE TABLE mt FINAL")

    assert node1.query("SELECT id FROM mt") == "1\n4\n"
    assert node2.query("SELECT id FROM mt") == "1\n4\n"

    for node in [node1, node2]:
        node.query("DROP TABLE mt")
コード例 #6
0
ファイル: test.py プロジェクト: zzsmdfj/ClickHouse
def test_in_memory_deduplication(start_cluster):
    for i in range(3):
        # table can be in readonly node
        exec_query_with_retry(
            node9,
            "INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')"
        )
        exec_query_with_retry(
            node10,
            "INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')"
        )

    node9.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20)
    node10.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20)

    assert node9.query("SELECT date, id, s FROM deduplication_table"
                       ) == "2020-03-03\t1\tfoo\n"
    assert node10.query("SELECT date, id, s FROM deduplication_table"
                        ) == "2020-03-03\t1\tfoo\n"
コード例 #7
0
ファイル: test.py プロジェクト: zhouruiapple/ClickHouse
def test_upgrade_while_mutation(start_cluster):
    node3.query(
        "CREATE TABLE mt1 (EventDate Date, id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', 'node3') ORDER BY tuple()"
    )

    node3.query(
        "INSERT INTO mt1 select '2020-02-13', number from numbers(100000)")

    node3.query("SYSTEM STOP MERGES mt1")
    node3.query("ALTER TABLE mt1 DELETE WHERE id % 2 == 0")

    node3.restart_with_latest_version(signal=9)

    exec_query_with_retry(node3,
                          "ALTER TABLE mt1 DELETE WHERE id > 100000",
                          settings={"mutations_sync": "2"})
    # will delete nothing, but previous async mutation will finish with this query

    assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n")
コード例 #8
0
ファイル: test.py プロジェクト: zzsmdfj/ClickHouse
def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
    drop_table([node_left, node_right], "test_ttl_delete")
    drop_table([node_left, node_right], "test_ttl_group_by")
    drop_table([node_left, node_right], "test_ttl_where")

    for node in [node_left, node_right]:
        node.query('''
                CREATE TABLE test_ttl_delete(date DateTime, id UInt32)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND
                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
            '''.format(suff=num_run, replica=node.name))

        node.query('''
                CREATE TABLE test_ttl_group_by(date DateTime, id UInt32, val UInt64)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val)
                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
            '''.format(suff=num_run, replica=node.name))

        node.query('''
                CREATE TABLE test_ttl_where(date DateTime, id UInt32)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
                SETTINGS max_number_of_merges_with_ttl_in_pool=100, max_replicated_merges_with_ttl_in_queue=100
            '''.format(suff=num_run, replica=node.name))

    node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)")
    node_left.query(
        "INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)"
    )
    node_right.query("INSERT INTO test_ttl_delete VALUES (now(), 3)")
    node_right.query(
        "INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)"
    )

    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 3)")
    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 4)")

    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 1)")
    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 2)")
    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 3)")
    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 4)")

    if node_left.with_installed_binary:
        node_left.restart_with_latest_version()

    if node_right.with_installed_binary:
        node_right.restart_with_latest_version()

    time.sleep(5)  # Wait for TTL

    # after restart table can be in readonly mode
    exec_query_with_retry(node_right, "OPTIMIZE TABLE test_ttl_delete FINAL")
    node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
    node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")

    exec_query_with_retry(node_left, "OPTIMIZE TABLE test_ttl_delete FINAL")
    node_left.query("OPTIMIZE TABLE test_ttl_group_by FINAL", timeout=20)
    node_left.query("OPTIMIZE TABLE test_ttl_where FINAL", timeout=20)

    # After OPTIMIZE TABLE, it is not guaranteed that everything is merged.
    # Possible scenario (for test_ttl_group_by):
    # 1. Two independent merges assigned: [0_0, 1_1] -> 0_1 and [2_2, 3_3] -> 2_3
    # 2. Another one merge assigned: [0_1, 2_3] -> 0_3
    # 3. Merge to 0_3 is delayed:
    #    `Not executing log entry for part 0_3 because 2 merges with TTL already executing, maximum 2
    # 4. OPTIMIZE FINAL does nothing, cause there is an entry for 0_3
    #
    # So, let's also sync replicas for node_right (for now).
    exec_query_with_retry(node_right, "SYSTEM SYNC REPLICA test_ttl_delete")
    node_right.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
    node_right.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)

    exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete")
    node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
    node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)

    assert node_left.query(
        "SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
    assert node_right.query(
        "SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"

    assert node_left.query(
        "SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
    assert node_right.query(
        "SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"

    assert node_left.query(
        "SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
    assert node_right.query(
        "SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
コード例 #9
0
ファイル: test.py プロジェクト: huaxz1986/ClickHouse
def test_ttl_compatibility(started_cluster, node_left, node_right, num_run):
    drop_table([node_left, node_right], "test_ttl_delete")
    drop_table([node_left, node_right], "test_ttl_group_by")
    drop_table([node_left, node_right], "test_ttl_where")

    for node in [node_left, node_right]:
        node.query(
            '''
                CREATE TABLE test_ttl_delete(date DateTime, id UInt32)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_delete_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND
            '''.format(suff=num_run, replica=node.name))

        node.query(
            '''
                CREATE TABLE test_ttl_group_by(date DateTime, id UInt32, val UInt64)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_group_by_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND GROUP BY id SET val = sum(val)
            '''.format(suff=num_run, replica=node.name))

        node.query(
            '''
                CREATE TABLE test_ttl_where(date DateTime, id UInt32)
                ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_where_{suff}', '{replica}')
                ORDER BY id PARTITION BY toDayOfMonth(date)
                TTL date + INTERVAL 3 SECOND DELETE WHERE id % 2 = 1
            '''.format(suff=num_run, replica=node.name))

    node_left.query("INSERT INTO test_ttl_delete VALUES (now(), 1)")
    node_left.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 2)")
    node_right.query("INSERT INTO test_ttl_delete VALUES (now(), 3)")
    node_right.query("INSERT INTO test_ttl_delete VALUES (toDateTime('2100-10-11 10:00:00'), 4)")

    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 1)")
    node_left.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 2)")
    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 3)")
    node_right.query("INSERT INTO test_ttl_group_by VALUES (now(), 0, 4)")

    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 1)")
    node_left.query("INSERT INTO test_ttl_where VALUES (now(), 2)")
    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 3)")
    node_right.query("INSERT INTO test_ttl_where VALUES (now(), 4)")

    if node_left.with_installed_binary:
        node_left.restart_with_latest_version()

    if node_right.with_installed_binary:
        node_right.restart_with_latest_version()
    
    time.sleep(5) # Wait for TTL

    # after restart table can be in readonly mode
    exec_query_with_retry(node_right, "OPTIMIZE TABLE test_ttl_delete FINAL")

    node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL")
    node_right.query("OPTIMIZE TABLE test_ttl_where FINAL")
    exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete")

    node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20)
    node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20)

    assert node_left.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"
    assert node_right.query("SELECT id FROM test_ttl_delete ORDER BY id") == "2\n4\n"

    assert node_left.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"
    assert node_right.query("SELECT val FROM test_ttl_group_by ORDER BY id") == "10\n"

    assert node_left.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"
    assert node_right.query("SELECT id FROM test_ttl_where ORDER BY id") == "2\n4\n"