Example #1
0
def check_all_hosts_sucesfully_executed(tsv_content, num_hosts):
    M = TSV.toMat(tsv_content)
    hosts = [(l[0], l[1]) for l in M] # (host, port)
    codes = [l[2] for l in M]
    messages = [l[3] for l in M]

    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content
    assert len(set(codes)) == 1, "\n" + tsv_content
    assert codes[0] == "0", "\n" + tsv_content
Example #2
0
def test_operation_id():
    create_and_fill_table(n=30)

    backup_name = new_backup_name()

    [id, status] = instance.query(
        f"BACKUP TABLE test.table TO {backup_name} SETTINGS id='first' ASYNC"
    ).split("\t")

    assert id == "first"
    assert status == "CREATING_BACKUP\n" or status == "BACKUP_CREATED\n"

    assert_eq_with_retry(
        instance,
        f"SELECT status, error FROM system.backups WHERE id='first'",
        TSV([["BACKUP_CREATED", ""]]),
    )

    instance.query("DROP TABLE test.table")

    [id, status] = instance.query(
        f"RESTORE TABLE test.table FROM {backup_name} SETTINGS id='second' ASYNC"
    ).split("\t")

    assert id == "second"
    assert status == "RESTORING\n" or status == "RESTORED\n"

    assert_eq_with_retry(
        instance,
        f"SELECT status, error FROM system.backups WHERE id='second'",
        TSV([["RESTORED", ""]]),
    )

    # Reuse the same ID again
    instance.query("DROP TABLE test.table")

    [id, status] = instance.query(
        f"RESTORE TABLE test.table FROM {backup_name} SETTINGS id='first'"
    ).split("\t")

    assert id == "first"
    assert status == "RESTORED\n"
Example #3
0
def test_ttl_many_columns(started_cluster):
    drop_table([node1, node2], "test_ttl_2")
    for node in [node1, node2]:
        node.query('''
            CREATE TABLE test_ttl_2(date DateTime, id UInt32,
                a Int32 TTL date,
                _idx Int32 TTL date,
                _offset Int32 TTL date,
                _partition Int32 TTL date)
            ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_2', '{replica}')
            ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0;
        '''.format(replica=node.name))

    node1.query("SYSTEM STOP MERGES test_ttl_2")
    node2.query("SYSTEM STOP MERGES test_ttl_2")

    node1.query(
        "INSERT INTO test_ttl_2 VALUES (toDateTime('2000-10-10 00:00:00'), 1, 2, 3, 4, 5)"
    )
    node1.query(
        "INSERT INTO test_ttl_2 VALUES (toDateTime('2100-10-10 10:00:00'), 6, 7, 8, 9, 10)"
    )

    # Check that part will appear in result of merge
    node1.query("SYSTEM STOP FETCHES test_ttl_2")
    node2.query("SYSTEM STOP FETCHES test_ttl_2")

    node1.query("SYSTEM START MERGES test_ttl_2")
    node2.query("SYSTEM START MERGES test_ttl_2")

    time.sleep(1)  # sleep to allow use ttl merge selector for second time
    node1.query("OPTIMIZE TABLE test_ttl_2 FINAL", timeout=5)

    expected = "1\t0\t0\t0\t0\n6\t7\t8\t9\t10\n"
    assert TSV(
        node1.query(
            "SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id"
        )) == TSV(expected)
    assert TSV(
        node2.query(
            "SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id"
        )) == TSV(expected)
Example #4
0
def test_system_graphite_retentions(graphite_table):
    expected = '''
graphite_rollup	\\\\.count$	sum	0	0	1	0	['test']	['graphite']
graphite_rollup	\\\\.max$	max	0	0	2	0	['test']	['graphite']
graphite_rollup	^five_min\\\\.		31536000	14400	3	0	['test']	['graphite']
graphite_rollup	^five_min\\\\.		5184000	3600	3	0	['test']	['graphite']
graphite_rollup	^five_min\\\\.		0	300	3	0	['test']	['graphite']
graphite_rollup	^one_min	avg	31536000	600	4	0	['test']	['graphite']
graphite_rollup	^one_min	avg	7776000	300	4	0	['test']	['graphite']
graphite_rollup	^one_min	avg	0	60	4	0	['test']	['graphite']
    '''
    result = q('SELECT * from system.graphite_retentions')

    assert TSV(result) == TSV(expected)

    q('''
DROP TABLE IF EXISTS test.graphite2;
CREATE TABLE test.graphite2
    (metric String, value Float64, timestamp UInt32, date Date, updated UInt32)
    ENGINE = GraphiteMergeTree('graphite_rollup')
    PARTITION BY toYYYYMM(date)
    ORDER BY (metric, timestamp)
    SETTINGS index_granularity=8192;
    ''')
    expected = '''
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
graphite_rollup	['test','test']	['graphite','graphite2']
    '''
    result = q('''
    SELECT
        config_name,
        Tables.database,
        Tables.table
    FROM system.graphite_retentions
    ''')
    assert TSV(result) == TSV(expected)
Example #5
0
def partition_complex_assert_columns_txt():
    path_to_parts = path_to_data + "data/test/partition_complex/"
    parts = TSV(
        q("SELECT name FROM system.parts WHERE database='test' AND table='partition_complex'"
          ))
    assert len(parts) > 0
    for part_name in parts.lines:
        path_to_columns = path_to_parts + part_name + "/columns.txt"
        # 2 header lines + 3 columns
        assert (instance.exec_in_container(["wc", "-l", path_to_columns
                                            ]).split()[0] == "5")
Example #6
0
    def check(self):
        first = cluster.instances["first_of_two"]
        second = cluster.instances["second_of_two"]

        a = first.query("SELECT count() from db_ttl_columns.source")
        b = second.query("SELECT count() from db_ttl_columns.destination")
        assert a == b, "Count"

        a = TSV(
            first.query("""SELECT sipHash64(*) from db_ttl_columns.source
            ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)"""
                        ))
        b = TSV(
            second.query("""SELECT sipHash64(*) from db_ttl_columns.destination
            ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)"""
                         ))
        assert a == b, "Data"

        first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
        second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC")
Example #7
0
def count_ttl_merges_in_background_pool(node, table, level):
    result = TSV(
        node.query(
            "SELECT * FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{}'"
            .format(table)))
    count = len(result)
    if count >= level:
        print(
            "count_ttl_merges_in_background_pool: merges more than warn level:\n{}"
            .format(result))
    return count
Example #8
0
def count_ttl_merges_in_background_pool(node, table, level):
    result = TSV(
        node.query(
            f"SELECT * FROM system.merges WHERE merge_type = 'TTL_DELETE' and table = '{table}'"
        ))
    count = len(result)
    if count >= level:
        logging.debug(
            f"count_ttl_merges_in_background_pool: merges more than warn level:\n{result}"
        )
    return count
Example #9
0
def test_count(started_cluster):
    node = started_cluster.instances["s0_0_0"]
    pure_s3 = node.query(
        """
    SELECT count(*) from s3(
        'http://minio1:9001/root/data/{clickhouse,database}/*', 
        'minio', 'minio123', 'CSV', 
        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
    )
    # print(pure_s3)
    s3_distibuted = node.query(
        """
    SELECT count(*) from s3Cluster(
        'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 
        'minio', 'minio123', 'CSV',
        'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
    )
    # print(s3_distibuted)

    assert TSV(pure_s3) == TSV(s3_distibuted)
Example #10
0
def test_system_functions():
    instance.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;")

    instance.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');")

    backup_name = new_backup_name()
    instance.query(f"BACKUP TABLE system.functions TO {backup_name}")

    instance.query("DROP FUNCTION linear_equation")
    instance.query("DROP FUNCTION parity_str")

    instance.query(f"RESTORE TABLE system.functions FROM {backup_name}")

    assert instance.query(
        "SELECT number, linear_equation(number, 2, 1) FROM numbers(3)"
    ) == TSV([[0, 1], [1, 3], [2, 5]])

    assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV(
        [[0, "even"], [1, "odd"], [2, "even"]]
    )
Example #11
0
def test_system_users_required_privileges():
    instance.query("CREATE ROLE r1")
    instance.query("CREATE USER u1 DEFAULT ROLE r1")
    instance.query("GRANT SELECT ON test.* TO u1")

    # SETTINGS allow_backup=false means the following user won't be included in backups.
    instance.query("CREATE USER u2 SETTINGS allow_backup=false")

    backup_name = new_backup_name()

    expected_error = "necessary to have grant BACKUP ON system.users"
    assert expected_error in instance.query_and_get_error(
        f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="******"
    )

    instance.query("GRANT BACKUP ON system.users TO u2")

    expected_error = "necessary to have grant BACKUP ON system.roles"
    assert expected_error in instance.query_and_get_error(
        f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="******"
    )

    instance.query("GRANT BACKUP ON system.roles TO u2")
    instance.query(
        f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="******"
    )

    instance.query("DROP USER u1")
    instance.query("DROP ROLE r1")

    expected_error = (
        "necessary to have grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*"
    )
    assert expected_error in instance.query_and_get_error(
        f"RESTORE ALL FROM {backup_name}", user="******"
    )

    instance.query("GRANT CREATE USER, CREATE ROLE, ROLE ADMIN ON *.* TO u2")

    expected_error = "necessary to have grant SELECT ON test.* WITH GRANT OPTION"
    assert expected_error in instance.query_and_get_error(
        f"RESTORE ALL FROM {backup_name}", user="******"
    )

    instance.query("GRANT SELECT ON test.* TO u2 WITH GRANT OPTION")
    instance.query(f"RESTORE ALL FROM {backup_name}", user="******")

    assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 DEFAULT ROLE r1\n"
    assert instance.query("SHOW GRANTS FOR u1") == TSV(
        ["GRANT SELECT ON test.* TO u1", "GRANT r1 TO u1"]
    )

    assert instance.query("SHOW CREATE ROLE r1") == "CREATE ROLE r1\n"
    assert instance.query("SHOW GRANTS FOR r1") == ""
Example #12
0
def test_deduplication_window_in_seconds(started_cluster):
    node = node1

    node.query("INSERT INTO simple2 VALUES (0, 0)")
    time.sleep(1)
    node.query("INSERT INTO simple2 VALUES (0, 0)")  # deduplication works here
    node.query("INSERT INTO simple2 VALUES (0, 1)")
    assert TSV(node.query("SELECT count() FROM simple2")) == TSV("2\n")

    # wait clean thread
    time.sleep(2)

    assert TSV.toMat(
        node.query(
            "SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple2/blocks'"
        ))[0][0] == "1"
    node.query(
        "INSERT INTO simple2 VALUES (0, 0)"
    )  # deduplication doesn't works here, the first hash node was deleted
    assert TSV.toMat(node.query("SELECT count() FROM simple2"))[0][0] == "3"
Example #13
0
def test_polymorphic_parts_non_adaptive(start_cluster):
    node1.query("SYSTEM STOP MERGES")
    node2.query("SYSTEM STOP MERGES")

    insert_random_data('non_adaptive_table', node1, 100)
    node2.query("SYSTEM SYNC REPLICA non_adaptive_table", timeout=20)

    insert_random_data('non_adaptive_table', node2, 100)
    node1.query("SYSTEM SYNC REPLICA non_adaptive_table", timeout=20)

    assert TSV(node1.query("SELECT part_type, count() FROM system.parts " \
                           "WHERE table = 'non_adaptive_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(
        "Wide\t2\n")
    assert TSV(node2.query("SELECT part_type, count() FROM system.parts " \
                           "WHERE table = 'non_adaptive_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(
        "Wide\t2\n")

    assert node1.contains_in_log(
        "<Warning> default.non_adaptive_table ([0-9a-f-]*): Table can't create parts with adaptive granularity"
    )
Example #14
0
def test_return_real_values(started_cluster):
    assert None != dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive"

    first_batch = """
    SELECT count(*)
    FROM
    (
    SELECT
        arrayJoin(arrayMap(x -> (x + 1000000), range(100))) AS id,
        dictGetString('default_string', 'value', toUInt64(id)) AS value
    )
    WHERE value = '';
    """

    assert TSV("0") == TSV(main_node.query(first_batch))

    # Waiting for cache to become expired
    time.sleep(5)

    assert TSV("0") == TSV(main_node.query(first_batch))
Example #15
0
def started_cluster():
    try:
        cluster.start()
        test_table.create_clickhouse_source(instance)
        for line in TSV(instance.query('select name from system.dictionaries')).lines:
            print line,

        yield cluster

    finally:
        cluster.shutdown()
Example #16
0
def test_on_server_fail(started_cluster):
    instance = cluster.instances['ch1']
    kill_instance = cluster.instances['ch2']

    ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")

    kill_instance.get_docker_handle().stop()
    request = instance.get_query_request("CREATE TABLE test.test_server_fail ON CLUSTER 'cluster' (i Int8) ENGINE=Null", timeout=30)
    kill_instance.get_docker_handle().start()

    ddl_check_query(instance, "DROP TABLE IF EXISTS test.__nope__ ON CLUSTER 'cluster'")

    # Check query itself
    check_all_hosts_sucesfully_executed(request.get_answer())

    # And check query artefacts
    contents = instance.query("SELECT hostName() AS h FROM all_tables WHERE database='test' AND name='test_server_fail' ORDER BY h")
    assert TSV(contents) == TSV("ch1\nch2\nch3\nch4\n")

    ddl_check_query(instance, "DROP TABLE IF EXISTS test.test_server_fail ON CLUSTER 'cluster'")
Example #17
0
def backup_restore(started_cluster):
    q("DROP TABLE IF EXISTS test.tbl")
    q("CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p"
      )
    for i in range(1, 4):
        q('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))
    for i in range(31, 34):
        q('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))

    expected = TSV(
        '1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33'
    )
    res = q("SELECT * FROM test.tbl ORDER BY p")
    assert (TSV(res) == expected)

    q("ALTER TABLE test.tbl FREEZE")

    yield

    q("DROP TABLE IF EXISTS test.tbl")
Example #18
0
    def check(self):
        zk = cluster.get_kazoo_client("zoo1")
        status_data, _ = zk.get(self.zk_task_path + "/status")
        assert (
            status_data
            == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}'
        )

        source = cluster.instances["first_trivial"]
        destination = cluster.instances["second_trivial"]

        assert TSV(
            source.query("SELECT count() FROM trivial_without_arguments")
        ) == TSV("1002\n")
        assert TSV(
            destination.query("SELECT count() FROM trivial_without_arguments")
        ) == TSV("1002\n")

        for node in [source, destination]:
            node.query("DROP TABLE trivial_without_arguments")
Example #19
0
def test_default_database(started_cluster):
    instance = cluster.instances['ch3']

    ddl_check_query(
        instance, "CREATE DATABASE IF NOT EXISTS test2 ON CLUSTER 'cluster'")
    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER 'cluster'")
    ddl_check_query(
        instance,
        "CREATE TABLE null ON CLUSTER 'cluster2' (s String DEFAULT 'escape\t\nme') ENGINE = Null"
    )

    contents = instance.query(
        "SELECT hostName() AS h, database FROM all_tables WHERE name = 'null' ORDER BY h"
    )
    assert TSV(contents) == TSV(
        "ch1\tdefault\nch2\ttest2\nch3\tdefault\nch4\ttest2\n")

    ddl_check_query(instance, "DROP TABLE IF EXISTS null ON CLUSTER cluster2")
    ddl_check_query(instance,
                    "DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'")
Example #20
0
def kafka_check_json_numbers(instance):
    retries = 0
    while True:
        if kafka_is_available(started_cluster):
            break
        else:
            retries += 1
            if retries > 50:
                raise 'Cannot connect to kafka.'
            print("Waiting for kafka to be available...")
            time.sleep(1)
    messages = ''
    for i in xrange(50):
        messages += json.dumps({'key': i, 'value': i}) + '\n'
    kafka_produce(started_cluster, 'json', messages)
    time.sleep(3)
    result = instance.query('SELECT * FROM test.kafka;')
    file = p.join(p.dirname(__file__), 'test_kafka_json.reference')
    with open(file) as reference:
        assert TSV(result) == TSV(reference)
Example #21
0
def test_jdbc_update(started_cluster):
    """Test update query using JDBC table function"""
    secrets = str(uuid.uuid1())
    instance.query("DROP TABLE IF EXISTS test.test_update")
    instance.query("""
        CREATE TABLE test.test_update ENGINE = Memory AS
        SELECT * FROM test.ClickHouseTable;
        SELECT * 
        FROM jdbc(
            '{}?mutation',
            'SET mutations_sync = 1; ALTER TABLE test.test_update UPDATE Str=''{}'' WHERE Num = {} - 1;'
        )
    """.format(datasource, secrets, records))

    actual = instance.query("""
        SELECT Str
        FROM jdbc('{}', 'SELECT * FROM test.test_update WHERE Num = {} - 1')
    """.format(datasource, records))
    assert TSV(actual) == TSV(secrets), "expecting {} but got {}".format(
        secrets, actual)
Example #22
0
def test_old_style():
    node.copy_file_to_container(
        os.path.join(SCRIPT_DIR, "configs/old_style.xml"),
        '/etc/clickhouse-server/config.d/z.xml')
    node.restart_clickhouse()
    assert node.query("SELECT * FROM system.user_directories") == TSV(
        [["users.xml", "users.xml", "/etc/clickhouse-server/users2.xml", 1, 1],
         [
             "local directory", "local directory",
             "/var/lib/clickhouse/access2/", 0, 2
         ]])
Example #23
0
def test_rollup_aggregation_2(graphite_table):
    result = q('''
INSERT INTO test.graphite
    SELECT 'one_min.x' AS metric,
           toFloat64(number) AS value,
           toUInt32(1111111111 - intDiv(number, 3)) AS timestamp,
           toDate('2017-02-02') AS date,
           toUInt32(100 - number) AS updated
    FROM (SELECT * FROM system.numbers LIMIT 50);

OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL;

SELECT * FROM test.graphite;
''')

    expected = '''\
one_min.x	24	1111110600	2017-02-02	100
'''

    assert TSV(result) == TSV(expected)
Example #24
0
def partition_complex_assert_columns_txt():
    path_to_parts = path_to_data + 'data/test/partition_complex/'
    parts = TSV(
        q("SELECT name FROM system.parts WHERE database='test' AND table='partition_complex'"
          ))
    assert len(parts) > 0
    for part_name in parts.lines:
        path_to_columns = path_to_parts + part_name + '/columns.txt'
        # 2 header lines + 3 columns
        assert instance.exec_in_container(['wc', '-l',
                                           path_to_columns]).split()[0] == '5'
Example #25
0
def test_insertion_sync(started_cluster):
    node1.query(
        '''SET insert_distributed_sync = 1, insert_distributed_timeout = 0;
    INSERT INTO distributed_table SELECT today() as date, number as val FROM system.numbers LIMIT 10000'''
    )

    assert node2.query("SELECT count() FROM local_table").rstrip() == '10000'

    node1.query('''
    SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
    INSERT INTO distributed_table SELECT today() - 1 as date, number as val FROM system.numbers LIMIT 10000'''
                )

    assert node2.query("SELECT count() FROM local_table").rstrip() == '20000'

    # Insert with explicitly specified columns.
    node1.query('''
    SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
    INSERT INTO distributed_table(date, val) VALUES ('2000-01-01', 100500)''')

    # Insert with columns specified in different order.
    node1.query('''
    SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
    INSERT INTO distributed_table(val, date) VALUES (100500, '2000-01-01')''')

    # Insert with an incomplete list of columns.
    node1.query('''
    SET insert_distributed_sync = 1, insert_distributed_timeout = 1;
    INSERT INTO distributed_table(val) VALUES (100500)''')

    expected = TSV('''
1970-01-01	100500
2000-01-01	100500
2000-01-01	100500''')
    assert TSV(
        node2.query(
            'SELECT date, val FROM local_table WHERE val = 100500 ORDER BY date'
        )) == expected

    node1.query("TRUNCATE TABLE local_table SYNC")
    node2.query("TRUNCATE TABLE local_table SYNC")
Example #26
0
def partition_complex_assert_checksums():
    # Do not check increment.txt - it can be changed by other tests with FREEZE
    cmd = [
        "bash",
        "-c",
        f"cd {path_to_data} && find shadow -type f -exec" +
        " md5sum {} \\; | grep partition_complex"
        " | sed 's shadow/[0-9]*/data/[a-z0-9_-]*/ shadow/1/data/test/ g' | sort | uniq",
    ]

    checksums = (
        "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n"
        "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n"
        "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n"
        "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n"
        "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n"
        "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n"
        "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n"
        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n"
        "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n"
        "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n"
        "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n"
        "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n"
        "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n"
        "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n"
        "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n"
        "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n"
        "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n"
        "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n"
        "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n"
        "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n"
        "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n"
    )

    assert TSV(instance.exec_in_container(cmd).replace("  ",
                                                       "\t")) == TSV(checksums)
Example #27
0
def test_simple_alters(test_cluster):
    instance = test_cluster.instances['ch2']

    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS merge ON CLUSTER '{cluster}'")
    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_32 ON CLUSTER '{cluster}'")
    test_cluster.ddl_check_query(instance, "DROP TABLE IF EXISTS all_merge_64 ON CLUSTER '{cluster}'")

    test_cluster.ddl_check_query(instance, """
CREATE TABLE IF NOT EXISTS merge ON CLUSTER '{cluster}' (p Date, i Int32)
ENGINE = MergeTree(p, p, 1)
""")
    test_cluster.ddl_check_query(instance, """
CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER '{cluster}' (p Date, i Int32)
ENGINE = Distributed('{cluster}', default, merge, i)
""")
    test_cluster.ddl_check_query(instance, """
CREATE TABLE IF NOT EXISTS all_merge_64 ON CLUSTER '{cluster}' (p Date, i Int64, s String)
ENGINE = Distributed('{cluster}', default, merge, i)
""")

    for i in range(0, 4, 2):
        k = (i / 2) * 2
        test_cluster.instances['ch{}'.format(i + 1)].query("INSERT INTO merge (i) VALUES ({})({})".format(k, k + 1))

    assert TSV(instance.query("SELECT i FROM all_merge_32 ORDER BY i")) == TSV(
        ''.join(['{}\n'.format(x) for x in range(4)]))

    time.sleep(5)
    test_cluster.ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER '{cluster}' MODIFY COLUMN i Int64")
    time.sleep(5)
    test_cluster.ddl_check_query(instance,
                                 "ALTER TABLE merge ON CLUSTER '{cluster}' ADD COLUMN s String DEFAULT toString(i) FORMAT TSV")

    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(
        ''.join(['{}\t{}\n'.format(x, x) for x in range(4)]))

    for i in range(0, 4, 2):
        k = (i / 2) * 2 + 4
        test_cluster.instances['ch{}'.format(i + 1)].query(
            "INSERT INTO merge (p, i) VALUES (31, {})(31, {})".format(k, k + 1))

    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(
        ''.join(['{}\t{}\n'.format(x, x) for x in range(8)]))

    test_cluster.ddl_check_query(instance, "ALTER TABLE merge ON CLUSTER '{cluster}' DETACH PARTITION 197002")
    assert TSV(instance.query("SELECT i, s FROM all_merge_64 ORDER BY i")) == TSV(
        ''.join(['{}\t{}\n'.format(x, x) for x in range(4)]))

    test_cluster.ddl_check_query(instance, "DROP TABLE merge ON CLUSTER '{cluster}'")
    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_32 ON CLUSTER '{cluster}'")
    test_cluster.ddl_check_query(instance, "DROP TABLE all_merge_64 ON CLUSTER '{cluster}'")
Example #28
0
def test_select_all_from_cached(cached_dictionary_structure):
    name, keys, use_parent = cached_dictionary_structure
    query = instance.query

    structure = test_table.get_structure_for_keys(keys, use_parent)
    query('''
    DROP TABLE IF EXISTS test.{0}
    '''.format(name))

    create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(
        name, structure)
    TSV(query(create_query))

    for i in range(4):
        result = TSV(query('select * from test.{0}'.format(name)))
        diff = test_table.compare_by_keys(keys,
                                          result.lines,
                                          use_parent,
                                          add_not_found_rows=False)
        print(test_table.process_diff(diff))
        assert not diff

        key = []
        for key_name in keys:
            if key_name.endswith('str'):
                key.append("'" + str(i) + "'")
            else:
                key.append(str(i))
        if len(key) == 1:
            key = 'toUInt64(' + str(i) + ')'
        else:
            key = str('(' + ','.join(key) + ')')
        query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key))

    result = TSV(query('select * from test.{0}'.format(name)))
    diff = test_table.compare_by_keys(keys,
                                      result.lines,
                                      use_parent,
                                      add_not_found_rows=True)
    print(test_table.process_diff(diff))
    assert not diff
    def check(self):
        instance = cluster.instances["first"]
        a = TSV(instance.query("SELECT count() from dailyhistory.yellow_tripdata"))
        b = TSV(instance.query("SELECT count() from monthlyhistory.yellow_tripdata"))
        assert a == b, "Distributed tables"

        for instance_name, instance in cluster.instances.items():
            instance = cluster.instances[instance_name]
            a = instance.query("SELECT count() from dailyhistory.yellow_tripdata_staging")
            b = instance.query("SELECT count() from monthlyhistory.yellow_tripdata_staging")
            assert a == b, "MergeTree tables on each shard"

            a = TSV(instance.query("SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id"))
            b = TSV(instance.query("SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id"))

            assert a == b, "Data on each shard"

        for name in ["first", "second", "third"]:
            node = cluster.instances[name]
            node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;")
            node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;")
Example #30
0
def test_select_all(dictionary_structure):
    name, keys, use_parent = dictionary_structure
    query = instance.query

    structure = test_table.get_structure_for_keys(keys, use_parent)
    query('''
    DROP TABLE IF EXISTS test.{0}
    '''.format(name))

    create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(
        name, structure)
    TSV(query(create_query))

    result = TSV(query('select * from test.{0}'.format(name)))

    diff = test_table.compare_by_keys(keys,
                                      result.lines,
                                      use_parent,
                                      add_not_found_rows=True)
    print(test_table.process_diff(diff))
    assert not diff
Example #31
0
def test_paths_not_matching_any_pattern(graphite_table):
    to_insert = '''\
one_min.x1	100	1000000000	2001-09-09	1
zzzzzzzz	100	1000000001	2001-09-09	1
zzzzzzzz	200	1000000001	2001-09-09	2
'''

    q('INSERT INTO test.graphite FORMAT TSV', to_insert)

    expected = '''\
one_min.x1	100	999999600	2001-09-09	1
zzzzzzzz	200	1000000001	2001-09-09	2
'''

    result = q('''
OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL;

SELECT * FROM test.graphite;
''')

    assert TSV(result) == TSV(expected)
Example #32
0
def test_deduplication_window_in_seconds(started_cluster):
    node = node1

    node1.query("""
        CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32)
        ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, id, 8192)""")

    node.query("INSERT INTO simple VALUES (0, 0)")
    time.sleep(1)
    node.query("INSERT INTO simple VALUES (0, 0)") # deduplication works here
    node.query("INSERT INTO simple VALUES (0, 1)")
    assert TSV(node.query("SELECT count() FROM simple")) == TSV("2\n")

    # wait clean thread
    time.sleep(2)

    assert TSV.toMat(node.query("SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple/blocks'"))[0][0] == "1"
    node.query("INSERT INTO simple VALUES (0, 0)") # deduplication doesn't works here, the first hash node was deleted
    assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3"

    node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")
Example #33
0
def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
    if num_hosts is None:
        num_hosts = len(cluster.instances)

    M = TSV.toMat(tsv_content)
    hosts = [l[0] for l in M]
    codes = [l[1] for l in M]
    messages = [l[2] for l in M]

    assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, tsv_content
    assert len(set(codes)) == 1, tsv_content
    assert codes[0] == "0", tsv_content