def attach_all_parts():
     detached_parts = clickhouse.query(chi["metadata"]["name"], "SELECT name FROM system.detached_parts WHERE database='default' AND table='test' AND reason=''", pod=detached_pod)
     all_parts = ""
     for part in detached_parts.splitlines():
         all_parts += f"ALTER TABLE default.test ATTACH PART '{part}';"
     if all_parts.strip() != "":
         clickhouse.query(chi["metadata"]["name"], all_parts, pod=detached_pod)
 def create_part_and_detach():
     clickhouse.query(chi["metadata"]["name"], "INSERT INTO default.test SELECT now(), number FROM numbers(100)", pod=detached_pod)
     part_name = clickhouse.query(
         chi["metadata"]["name"],
         sql="SELECT name FROM system.parts WHERE database='default' AND table='test' ORDER BY modification_time DESC LIMIT 1",
         pod=detached_pod
     )
     clickhouse.query(chi["metadata"]["name"], f"ALTER TABLE default.test DETACH PART '{part_name}'", pod=detached_pod)
def prepare_table_for_backup(backup_pod, chi, rows=1000):
    backup_name = f'test_backup_{time.strftime("%Y-%m-%d_%H%M%S")}'
    clickhouse.query(
        chi['metadata']['name'],
        "CREATE TABLE IF NOT EXISTS default.test_backup ON CLUSTER 'all-sharded' (i UInt64) ENGINE MergeTree() ORDER BY tuple();"
        f"INSERT INTO default.test_backup SELECT number FROM numbers({rows})",
        pod=backup_pod)
    return backup_name
 def run_queries_with_priority():
     sql = ""
     for i in range(50):
         sql += f"SET priority={i % 20};SELECT uniq(number) FROM numbers(20000000):"
     cmd = f"echo \\\"{sql} SELECT 1\\\" | xargs -i'{{}}' --no-run-if-empty -d ':' -P 20 clickhouse-client --time -m -n -q \\\"{{}}\\\""
     kubectl.launch(f"exec {priority_pod} -- bash -c \"{cmd}\"", timeout=120)
     clickhouse.query(
         chi["metadata"]["name"],
         "SELECT event_time, CurrentMetric_QueryPreempted FROM system.metric_log WHERE CurrentMetric_QueryPreempted > 0",
         host=priority_svc,
     )
def test_longest_running_query(self, prometheus_operator_spec, clickhouse_operator_spec, chi):
    long_running_pod, long_running_svc, _, _ = alerts.random_pod_choice_for_callbacks(chi)
    # 600s trigger + 2*30s - double prometheus scraping interval
    clickhouse.query(chi["metadata"]["name"], "SELECT now(),sleepEachRow(1),number FROM system.numbers LIMIT 660",
                     host=long_running_svc, timeout=670)
    with Then("check ClickHouseLongestRunningQuery firing"):
        fired = alerts.wait_alert_state("ClickHouseLongestRunningQuery", "firing", True, labels={"hostname": long_running_svc},
                                 time_range='30s')
        assert fired, error("can't get ClickHouseLongestRunningQuery alert in firing state")
    with Then("check ClickHouseLongestRunningQuery gone away"):
        resolved = alerts.wait_alert_state("ClickHouseLongestRunningQuery", "firing", False, labels={"hostname": long_running_svc})
        assert resolved, error("can't check ClickHouseLongestRunningQuery alert is gone away")
def test_backup_size(self, chi, minio_spec):
    decrease_pod, _, increase_pod, _ = alerts.random_pod_choice_for_callbacks(
        chi)

    backup_cases = {
        decrease_pod: {
            'rows': (10000, 1000),
            'decrease': True,
        },
        increase_pod: {
            'rows': (1000, 10000),
            'decrease': False,
        },
    }
    for backup_pod in backup_cases:
        decrease = backup_cases[backup_pod]['decrease']
        for backup_rows in backup_cases[backup_pod]['rows']:
            backup_name = prepare_table_for_backup(backup_pod,
                                                   chi,
                                                   rows=backup_rows)
            exec_on_backup_container(
                backup_pod,
                f'curl -X POST -sL "http://127.0.0.1:7171/backup/create?name={backup_name}"'
            )
            wait_backup_command_status(backup_pod,
                                       f'create {backup_name}',
                                       expected_status='success')
            if decrease:
                clickhouse.query(chi['metadata']['name'],
                                 f"TRUNCATE TABLE default.test_backup",
                                 pod=backup_pod)
            time.sleep(15)
        fired = alerts.wait_alert_state(
            "ClickHouseBackupSizeChanged",
            "firing",
            expected_state=True,
            sleep_time=settings.prometheus_scrape_interval,
            labels={"pod_name": backup_pod},
            time_range='60s')
        assert fired, error(
            f"can't get ClickHouseBackupSizeChanged alert in firing state, decrease={decrease}"
        )

        with Then("check ClickHouseBackupSizeChanged gone away"):
            resolved = alerts.wait_alert_state("ClickHouseBackupSizeChanged",
                                               "firing",
                                               expected_state=False,
                                               labels={"pod_name": backup_pod})
            assert resolved, error(
                f"can't get ClickHouseBackupSizeChanged alert is gone away, decrease={decrease}"
            )
    def reboot_clickhouse_and_distributed_exection():
        # we need 70 delayed files for catch
        insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 10000'
        select_sql = 'SELECT count() FROM default.test_distr'
        with Then("reboot clickhouse-server pod"):
            kubectl.launch(
                f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse-pod -- kill 1",
                ok_to_fail=True,
            )
            with Then("Insert to distributed table"):
                clickhouse.query(chi["metadata"]["name"], insert_sql, host=delayed_pod, ns=kubectl.namespace)

            with Then("Select from distributed table"):
                clickhouse.query_with_error(chi["metadata"]["name"], select_sql, host=delayed_pod,
                                            ns=kubectl.namespace)
Beispiel #8
0
 def insert_replicated_data(chi, create_tables, insert_tables):
     with When(f'create if not exists replicated tables {create_tables}'):
         for table in create_tables:
             clickhouse.create_table_on_cluster(
                 chi,
                 'all-sharded',
                 f'default.{table}',
                 f'(id UInt64) ENGINE=ReplicatedMergeTree(\'/clickhouse/tables/default.{table}/{{shard}}\',\'{{replica}}\') ORDER BY (id)',
                 if_not_exists=True,
             )
     with When(f'insert tables data {insert_tables}'):
         for table in insert_tables:
             clickhouse.query(
                 chi['metadata']['name'],
                 f'INSERT INTO default.{table} SELECT rand()+number FROM numbers(1000)',
                 pod="chi-test-cluster-for-zk-default-0-1-0")
    def insert_many_parts_to_clickhouse():
        stop_merges = "SYSTEM STOP MERGES default.test;"
        min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;"
        with When(f"Insert to MergeTree table {parts_limits} parts"):
            r = parts_limits
            sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};"
            clickhouse.query(chi_name, sql, host=selected_svc, ns=kubectl.namespace)

            sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
            clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
            with Then(f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"):
                time.sleep(prometheus_scrape_interval * 2)

            with Then("after 21.8 InsertedRows include system.* rows"):
                for i in range(35):
                    sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
                    clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
def test_replicas_max_absolute_delay(self, prometheus_operator_spec, clickhouse_operator_spec, chi):
    stop_replica_pod, stop_replica_svc, insert_pod, insert_svc = alerts.random_pod_choice_for_callbacks(chi)
    clickhouse.create_table_on_cluster(
        chi,
        'all-replicated', 'default.test_repl',
        '(event_time DateTime, test UInt64) ' +
        'ENGINE ReplicatedMergeTree(\'/clickhouse/tables/{installation}-{shard}/test_repl\', \'{replica}\') ORDER BY tuple()'
    )
    prometheus_scrape_interval = 15

    def restart_clickhouse_and_insert_to_replicated_table():
        with When(f"stop replica fetches on {stop_replica_svc}"):
            sql = "SYSTEM STOP FETCHES default.test_repl"
            kubectl.launch(
                f"exec -n {kubectl.namespace} {stop_replica_pod} -c clickhouse-pod -- clickhouse-client -q \"{sql}\"",
                ok_to_fail=True, timeout=600,
            )
            sql = "INSERT INTO default.test_repl SELECT now(), number FROM numbers(100000)"
            kubectl.launch(
                f"exec -n {kubectl.namespace} {insert_pod} -c clickhouse-pod -- clickhouse-client -q \"{sql}\"",
            )

    with Then("check ClickHouseReplicasMaxAbsoluteDelay firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseReplicasMaxAbsoluteDelay", "firing", True, labels={"hostname": stop_replica_svc},
            time_range='60s', sleep_time=prometheus_scrape_interval * 2,
            callback=restart_clickhouse_and_insert_to_replicated_table
        )
        assert fired, error("can't get ClickHouseReadonlyReplica alert in firing state")

    clickhouse.query(
        chi["metadata"]["name"],
        "SYSTEM START FETCHES; SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl",
        host=stop_replica_svc, timeout=240
    )
    with Then("check ClickHouseReplicasMaxAbsoluteDelay gone away"):
        resolved = alerts.wait_alert_state("ClickHouseReplicasMaxAbsoluteDelay", "firing", False, labels={"hostname": stop_replica_svc})
        assert resolved, error("can't check ClickHouseReplicasMaxAbsoluteDelay alert is gone away")

    clickhouse.drop_table_on_cluster(chi, 'all-replicated', 'default.test_repl')
def test_distributed_files_to_insert(self, prometheus_operator_spec, clickhouse_operator_spec, chi):
    delayed_pod, delayed_svc, restarted_pod, restarted_svc = alerts.random_pod_choice_for_callbacks(chi)
    clickhouse.create_distributed_table_on_cluster(chi)

    insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1000'
    clickhouse.query(
        chi["metadata"]["name"], 'SYSTEM STOP DISTRIBUTED SENDS default.test_distr',
        pod=delayed_pod, ns=kubectl.namespace
    )

    files_to_insert_from_metrics = 0
    files_to_insert_from_disk = 0
    tries = 0
    # we need more than 50 delayed files for catch
    while files_to_insert_from_disk <= 55 and files_to_insert_from_metrics <= 55 and tries < 500:
        kubectl.launch(
            f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse-pod -- kill 1",
            ok_to_fail=True,
        )
        clickhouse.query(chi["metadata"]["name"], insert_sql, pod=delayed_pod, host=delayed_pod, ns=kubectl.namespace)
        files_to_insert_from_metrics = clickhouse.query(
            chi["metadata"]["name"], "SELECT value FROM system.metrics WHERE metric='DistributedFilesToInsert'",
            pod=delayed_pod, ns=kubectl.namespace
        )
        files_to_insert_from_metrics = int(files_to_insert_from_metrics)

        files_to_insert_from_disk = int(kubectl.launch(
            f"exec -n {kubectl.namespace} {delayed_pod} -c clickhouse-pod -- bash -c 'ls -la /var/lib/clickhouse/data/default/test_distr/*/*.bin 2>/dev/null | wc -l'",
            ok_to_fail=False,
        ))

    with When("reboot clickhouse-server pod"):
        fired = alerts.wait_alert_state(
            "ClickHouseDistributedFilesToInsertHigh", "firing", True,
            labels={"hostname": delayed_svc, "chi": chi["metadata"]["name"]}
        )
        assert fired, error("can't get ClickHouseDistributedFilesToInsertHigh alert in firing state")

    kubectl.wait_pod_status(restarted_pod, "Running", ns=kubectl.namespace)

    clickhouse.query(
        chi["metadata"]["name"], 'SYSTEM START DISTRIBUTED SENDS default.test_distr',
        pod=delayed_pod, ns=kubectl.namespace
    )

    with Then("check ClickHouseDistributedFilesToInsertHigh gone away"):
        resolved = alerts.wait_alert_state("ClickHouseDistributedFilesToInsertHigh", "firing", False, labels={"hostname": delayed_svc})
        assert resolved, error("can't check ClickHouseDistributedFilesToInsertHigh alert is gone away")

    clickhouse.drop_distributed_table_on_cluster(chi)
    def check_zk_root_znode(chi, pod_count, retry_count=5):
        for pod_num in range(pod_count):
            out = ""
            expected_out = ""
            for i in range(retry_count):
                if keeper_type == "zookeeper-operator":
                    expected_out = "[clickhouse, zookeeper, zookeeper-operator]"
                    keeper_cmd = './bin/zkCli.sh ls /'
                    pod_prefix = "zookeeper"
                elif keeper_type == "zookeeper":
                    expected_out = "[clickhouse, zookeeper]"
                    keeper_cmd = './bin/zkCli.sh ls /'
                    pod_prefix = "zookeeper"
                else:
                    expected_out = "clickhouse"
                    keeper_cmd = "if [[ ! $(command -v zookeepercli) ]]; then "
                    keeper_cmd += "wget -q -O /tmp/zookeepercli.deb https://github.com/outbrain/zookeepercli/releases/download/v1.0.12/zookeepercli_1.0.12_amd64.deb; "
                    keeper_cmd += "dpkg -i /tmp/zookeepercli.deb; "
                    keeper_cmd += "fi; "
                    keeper_cmd += "zookeepercli -servers 127.0.0.1:2181 -c ls /"
                    pod_prefix = "clickhouse-keeper"

                out = kubectl.launch(
                    f"exec {pod_prefix}-{pod_num} -- bash -ce '{keeper_cmd}'",
                    ns=settings.test_namespace,
                    ok_to_fail=True)
                if expected_out in out:
                    break
                else:
                    with Then(
                            f"{keeper_type} ROOT NODE not ready, wait {(i + 1) * 3} sec"
                    ):
                        time.sleep((i + 1) * 3)
            assert expected_out in out, f"Unexpected {keeper_type} `ls /` output"

        out = clickhouse.query(
            chi["metadata"]["name"],
            "SELECT count() FROM system.zookeeper WHERE path='/'")
        expected_out = {
            "zookeeper": "2",
            "zookeeper-operator": "3",
            "clickhouse-keeper": "1",
        }
        assert expected_out[keeper_type] == out.strip(
            " \t\r\n"
        ), f"Unexpected `SELECT count() FROM system.zookeeper WHERE path='/'` output {out}"
Beispiel #13
0
def wait_clickhouse_cluster_ready(chi):
    with Given("All expected pods present in system.clusters"):
        all_pods_ready = False
        while all_pods_ready is False:
            all_pods_ready = True

            for pod in chi['status']['pods']:
                cluster_response = clickhouse.query(
                    chi["metadata"]["name"],
                    "SYSTEM RELOAD CONFIG; SELECT host_name FROM system.clusters WHERE cluster='all-sharded'",
                    pod=pod
                )
                for host in chi['status']['fqdns']:
                    svc_short_name = host.replace(f'.{settings.test_namespace}.svc.cluster.local', '')
                    if svc_short_name not in cluster_response:
                        with Then("Not ready, sleep 5 seconds"):
                            all_pods_ready = False
                            time.sleep(5)
Beispiel #14
0
def wait_clickhouse_no_readonly_replicas(chi, retries=10):
    expected_replicas = chi["spec"]["configuration"]["clusters"][0]["layout"][
        "replicasCount"]
    expected_replicas = "[" + ",".join(["0"] * expected_replicas) + "]"
    for i in range(retries):
        readonly_replicas = clickhouse.query(
            chi['metadata']['name'],
            "SELECT groupArray(value) FROM cluster('all-sharded',system.metrics) WHERE metric='ReadonlyReplica'"
        )
        if readonly_replicas == expected_replicas:
            break
        else:
            with Then(
                    f"Clickhouse have readonly_replicas={readonly_replicas}, expected={expected_replicas}, Wait for {i*3} seconds"
            ):
                time.sleep(i * 3)
        if i == retries - 1:
            Fail(
                f"ClickHouse ZK failed, readonly_replicas={readonly_replicas}, expected={expected_replicas}"
            )
Beispiel #15
0
    def check_zk_root_znode(chi, pod_count, zk_retry=5):
        for pod_num in range(pod_count):
            out = ""
            for i in range(zk_retry):
                out = kubectl.launch(
                    f"exec zookeeper-{pod_num} -- bash -ce './bin/zkCli.sh ls /'",
                    ns=settings.test_namespace,
                    ok_to_fail=True)
                if "[clickhouse, zookeeper]" in out:
                    break
                else:
                    with Then(
                            f"Zookeeper ROOT NODE not ready, wait { (i+1)*3} sec"
                    ):
                        time.sleep((i + 1) * 3)
            assert "[clickhouse, zookeeper]" in out, "Unexpected `zkCli.sh ls /` output"

        out = clickhouse.query(
            chi["metadata"]["name"],
            "SELECT count() FROM system.zookeeper WHERE path='/'")
        assert "2" == out.strip(
            " \t\r\n"
        ), f"Unexpected `SELECT count() FROM system.zookeeper WHERE path='/'` output {out}"
Beispiel #16
0
def test_ch_002(self):
    kubectl.create_and_check(
        "manifests/chi/test-ch-002-row-level.yaml", {
            "apply_templates": {"manifests/chit/tpl-clickhouse-21.8.yaml"},
            "do_not_delete": 1,
        })

    chi = "test-ch-002-row-level"
    create_table = """create table test (d Date default today(), team LowCardinality(String), user String) Engine = MergeTree() PARTITION BY d ORDER BY d;"""

    with When("Create test table"):
        clickhouse.query(chi, create_table)

    with And("Insert some data"):
        clickhouse.query(
            chi,
            "INSERT INTO test(team, user) values('team1', 'user1'),('team2', 'user2'),('team3', 'user3'),('team4', 'user4')"
        )

    with Then(
            "Make another query for different users. It should be restricted to corresponding team by row-level security"
    ):
        for user in ['user1', 'user2', 'user3', 'user4']:
            out = clickhouse.query(chi,
                                   "select user from test",
                                   user=user,
                                   pwd=user)
            assert out == user, error()

    with Then(
            "Make a count() query for different users. It should be restricted to corresponding team by row-level security"
    ):
        for user in ['user1', 'user2', 'user3', 'user4']:
            out = clickhouse.query(chi,
                                   "select count() from test",
                                   user=user,
                                   pwd=user)
            assert out == "1", error()

    kubectl.delete_chi(chi)
Beispiel #17
0
def test_zookeeper_rescale(self):
    """
    test scenario for ZK

    CH 1 -> 2 wait complete + ZK 1 -> 3 nowait
    CH 2 -> 1 wait complete + ZK 3 -> 1 nowait
    CH 1 -> 2 wait complete + ZK 1 -> 3 nowait
    """
    def insert_replicated_data(chi, create_tables, insert_tables):
        with When(f'create if not exists replicated tables {create_tables}'):
            for table in create_tables:
                clickhouse.create_table_on_cluster(
                    chi,
                    'all-sharded',
                    f'default.{table}',
                    f'(id UInt64) ENGINE=ReplicatedMergeTree(\'/clickhouse/tables/default.{table}/{{shard}}\',\'{{replica}}\') ORDER BY (id)',
                    if_not_exists=True,
                )
        with When(f'insert tables data {insert_tables}'):
            for table in insert_tables:
                clickhouse.query(
                    chi['metadata']['name'],
                    f'INSERT INTO default.{table} SELECT rand()+number FROM numbers(1000)',
                    pod="chi-test-cluster-for-zk-default-0-1-0")

    def check_zk_root_znode(chi, pod_count, zk_retry=5):
        for pod_num in range(pod_count):
            out = ""
            for i in range(zk_retry):
                out = kubectl.launch(
                    f"exec zookeeper-{pod_num} -- bash -ce './bin/zkCli.sh ls /'",
                    ns=settings.test_namespace,
                    ok_to_fail=True)
                if "[clickhouse, zookeeper]" in out:
                    break
                else:
                    with Then(
                            f"Zookeeper ROOT NODE not ready, wait { (i+1)*3} sec"
                    ):
                        time.sleep((i + 1) * 3)
            assert "[clickhouse, zookeeper]" in out, "Unexpected `zkCli.sh ls /` output"

        out = clickhouse.query(
            chi["metadata"]["name"],
            "SELECT count() FROM system.zookeeper WHERE path='/'")
        assert "2" == out.strip(
            " \t\r\n"
        ), f"Unexpected `SELECT count() FROM system.zookeeper WHERE path='/'` output {out}"

    def rescale_zk_and_clickhouse(ch_node_count,
                                  zk_node_count,
                                  first_install=False):
        zk_manifest = 'zookeeper-1-node-1GB-for-tests-only.yaml' if zk_node_count == 1 else 'zookeeper-3-nodes-1GB-for-tests-only.yaml'
        _, chi = util.install_clickhouse_and_zookeeper(
            chi_file=
            f'manifests/chi/test-cluster-for-zookeeper-{ch_node_count}.yaml',
            chi_template_file='manifests/chit/tpl-clickhouse-latest.yaml',
            chi_name='test-cluster-for-zk',
            zk_manifest=zk_manifest,
            clean_ns=first_install,
            force_zk_install=True,
            zk_install_first=first_install,
            make_object_count=False,
        )
        return chi

    with When("Clean exists ClickHouse and Zookeeper"):
        kubectl.delete_all_zookeeper(settings.test_namespace)
        kubectl.delete_all_chi(settings.test_namespace)

    with When("Install CH 1 node ZK 1 node"):
        chi = rescale_zk_and_clickhouse(ch_node_count=1,
                                        zk_node_count=1,
                                        first_install=True)
        util.wait_clickhouse_cluster_ready(chi)
        wait_zookeeper_ready(pod_count=1)
        check_zk_root_znode(chi, pod_count=1)

        util.wait_clickhouse_cluster_ready(chi)
        wait_clickhouse_no_readonly_replicas(chi)
        insert_replicated_data(chi,
                               create_tables=['test_repl1'],
                               insert_tables=['test_repl1'])

    total_iterations = 5
    for iteration in range(total_iterations):
        with When(f"ITERATION {iteration}"):
            with Then("CH 1 -> 2 wait complete + ZK 1 -> 3 nowait"):
                chi = rescale_zk_and_clickhouse(ch_node_count=2,
                                                zk_node_count=3)
                wait_zookeeper_ready(pod_count=3)
                check_zk_root_znode(chi, pod_count=3)

                util.wait_clickhouse_cluster_ready(chi)
                insert_replicated_data(
                    chi,
                    create_tables=['test_repl2'],
                    insert_tables=['test_repl1', 'test_repl2'])

            with Then("CH 2 -> 1 wait complete + ZK 3 -> 1 nowait"):
                chi = rescale_zk_and_clickhouse(ch_node_count=1,
                                                zk_node_count=1)
                wait_zookeeper_ready(pod_count=1)
                check_zk_root_znode(chi, pod_count=1)

                util.wait_clickhouse_cluster_ready(chi)
                insert_replicated_data(
                    chi,
                    create_tables=['test_repl3'],
                    insert_tables=['test_repl1', 'test_repl2', 'test_repl3'])

    with When("CH 1 -> 2 wait complete + ZK 1 -> 3 nowait"):
        chi = rescale_zk_and_clickhouse(ch_node_count=2, zk_node_count=3)
        check_zk_root_znode(chi, pod_count=3)

    with Then('check data in tables'):
        for table, exptected_rows in {
                "test_repl1": str(1000 + 2000 * total_iterations),
                "test_repl2": str(2000 * total_iterations),
                "test_repl3": str(1000 * total_iterations)
        }.items():
            actual_rows = clickhouse.query(
                chi['metadata']['name'],
                f'SELECT count() FROM default.{table}',
                pod="chi-test-cluster-for-zk-default-0-1-0")
            assert actual_rows == exptected_rows, f"Invalid rows counter after inserts {table} expected={exptected_rows} actual={actual_rows}"

    with Then('drop all created tables'):
        for i in range(3):
            clickhouse.drop_table_on_cluster(chi, 'all-sharded',
                                             f'default.test_repl{i+1}')
Beispiel #18
0
def test_ch_001(self):
    util.require_zookeeper()
    quorum_template = "manifests/chit/tpl-clickhouse-21.8.yaml"
    chit_data = yaml_manifest.get_manifest_data(
        util.get_full_path(quorum_template))

    kubectl.launch(f"delete chit {chit_data['metadata']['name']}",
                   ns=settings.test_namespace,
                   ok_to_fail=True)
    kubectl.create_and_check("manifests/chi/test-ch-001-insert-quorum.yaml", {
        "apply_templates": {quorum_template},
        "pod_count": 2,
        "do_not_delete": 1,
    })

    chi = yaml_manifest.get_chi_name(
        util.get_full_path("manifests/chi/test-ch-001-insert-quorum.yaml"))
    chi_data = kubectl.get("chi", ns=settings.test_namespace, name=chi)
    util.wait_clickhouse_cluster_ready(chi_data)

    host0 = "chi-test-ch-001-insert-quorum-default-0-0"
    host1 = "chi-test-ch-001-insert-quorum-default-0-1"

    create_table = """
    create table t1 on cluster default (a Int8, d Date default today())
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by d order by a 
    TTL d + interval 5 second
    SETTINGS merge_with_ttl_timeout=5""".replace('\r', '').replace('\n', '')

    create_mv_table2 = """
    create table t2 on cluster default (a Int8)
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by tuple() order by a""".replace('\r', '').replace('\n', '')

    create_mv_table3 = """
    create table t3 on cluster default (a Int8)
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by tuple() order by a""".replace('\r', '').replace('\n', '')

    create_mv2 = "create materialized view t_mv2 on cluster default to t2 as select a from t1"
    create_mv3 = "create materialized view t_mv3 on cluster default to t3 as select a from t1"

    with Given("Tables t1, t2, t3 and MVs t1->t2, t1-t3 are created"):
        clickhouse.query(chi, create_table)
        clickhouse.query(chi, create_mv_table2)
        clickhouse.query(chi, create_mv_table3)

        clickhouse.query(chi, create_mv2)
        clickhouse.query(chi, create_mv3)

        with When("Add a row to an old partition"):
            clickhouse.query(chi,
                             "insert into t1(a,d) values(6, today()-1)",
                             host=host0)

        with When("Stop fetches for t1 at replica1"):
            clickhouse.query(chi, "system stop fetches default.t1", host=host1)

            with Then("Wait 10 seconds and the data should be dropped by TTL"):
                time.sleep(10)
                out = clickhouse.query(chi,
                                       "select count() from t1 where a=6",
                                       host=host0)
                assert out == "0", error()

        with When("Resume fetches for t1 at replica1"):
            clickhouse.query(chi,
                             "system start fetches default.t1",
                             host=host1)
            time.sleep(5)

            with Then("Inserts should resume"):
                clickhouse.query(chi,
                                 "insert into t1(a) values(7)",
                                 host=host0)

        clickhouse.query(chi, "insert into t1(a) values(1)")

        with When("Stop fetches for t2 at replica1"):
            clickhouse.query(chi, "system stop fetches default.t2", host=host1)

            with Then("Insert should fail since it can not reach the quorum"):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(2)", host=host0)
                assert "Timeout while waiting for quorum" in out, error()

        # kubectl(f"exec {host0}-0 -n test -- cp /var/lib//clickhouse/data/default/t2/all_1_1_0/a.mrk2 /var/lib//clickhouse/data/default/t2/all_1_1_0/a.bin")
        # with Then("Corrupt data part in t2"):
        #    kubectl(f"exec {host0}-0 -n test -- sed -i \"s/b/c/\" /var/lib/clickhouse/data/default/t2/all_1_1_0/a.bin")

        with When("Resume fetches for t2 at replica1"):
            clickhouse.query(chi,
                             "system start fetches default.t2",
                             host=host1)
            i = 0
            while "2" != clickhouse.query(
                    chi,
                    "select active_replicas from system.replicas where database='default' and table='t1'",
                    pod=host0) and i < 10:
                with Then("Not ready, wait 5 seconds"):
                    time.sleep(5)
                    i += 1

            with Then(
                    "Inserts should fail with an error regarding not satisfied quorum"
            ):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(3)", host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

            with And("Second insert of the same block should pass"):
                clickhouse.query(chi,
                                 "insert into t1(a) values(3)",
                                 host=host0)

            with And("Insert of the new block should fail"):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(4)", host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

            with And(
                    "Second insert of the same block with 'deduplicate_blocks_in_dependent_materialized_views' setting should fail"
            ):
                out = clickhouse.query_with_error(
                    chi,
                    "set deduplicate_blocks_in_dependent_materialized_views=1; insert into t1(a) values(5)",
                    host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

        out = clickhouse.query_with_error(
            chi,
            "select t1.a t1_a, t2.a t2_a from t1 left outer join t2 using (a) order by t1_a settings join_use_nulls=1"
        )
        note(out)
def test_insert_related_alerts(self, prometheus_operator_spec, clickhouse_operator_spec, chi):
    clickhouse.create_table_on_cluster(chi)
    delayed_pod, delayed_svc, rejected_pod, rejected_svc = alerts.random_pod_choice_for_callbacks(chi)

    prometheus_scrape_interval = settings.prometheus_scrape_interval
    # default values in system.merge_tree_settings
    parts_to_throw_insert = 300
    parts_to_delay_insert = 150
    chi_name = chi["metadata"]["name"]

    parts_limits = parts_to_delay_insert
    selected_svc = delayed_svc

    def insert_many_parts_to_clickhouse():
        stop_merges = "SYSTEM STOP MERGES default.test;"
        min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;"
        with When(f"Insert to MergeTree table {parts_limits} parts"):
            r = parts_limits
            sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};"
            clickhouse.query(chi_name, sql, host=selected_svc, ns=kubectl.namespace)

            sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
            clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
            with Then(f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"):
                time.sleep(prometheus_scrape_interval * 2)

            with Then("after 21.8 InsertedRows include system.* rows"):
                for i in range(35):
                    sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
                    clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)

    insert_many_parts_to_clickhouse()
    with Then("check ClickHouseDelayedInsertThrottling firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseDelayedInsertThrottling", "firing", True, labels={"hostname": delayed_svc}, time_range="60s"
        )
        assert fired, error("can't get ClickHouseDelayedInsertThrottling alert in firing state")
    with Then("check ClickHouseMaxPartCountForPartition firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseMaxPartCountForPartition", "firing", True, labels={"hostname": delayed_svc}, time_range="90s"
        )
        assert fired, error("can't get ClickHouseMaxPartCountForPartition alert in firing state")
    with Then("check ClickHouseLowInsertedRowsPerQuery firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseLowInsertedRowsPerQuery", "firing", True, labels={"hostname": delayed_svc}, time_range="120s",
        )
        assert fired, error("can't get ClickHouseLowInsertedRowsPerQuery alert in firing state")

    clickhouse.query(chi_name, "SYSTEM START MERGES default.test", host=selected_svc, ns=kubectl.namespace)

    with Then("check ClickHouseDelayedInsertThrottling gone away"):
        resolved = alerts.wait_alert_state("ClickHouseDelayedInsertThrottling", "firing", False, labels={"hostname": delayed_svc})
        assert resolved, error("can't check ClickHouseDelayedInsertThrottling alert is gone away")
    with Then("check ClickHouseMaxPartCountForPartition gone away"):
        resolved = alerts.wait_alert_state("ClickHouseMaxPartCountForPartition", "firing", False, labels={"hostname": delayed_svc})
        assert resolved, error("can't check ClickHouseMaxPartCountForPartition alert is gone away")
    with Then("check ClickHouseLowInsertedRowsPerQuery gone away"):
        resolved = alerts.wait_alert_state("ClickHouseLowInsertedRowsPerQuery", "firing", False, labels={"hostname": delayed_svc})
        assert resolved, error("can't check ClickHouseLowInsertedRowsPerQuery alert is gone away")

    parts_limits = parts_to_throw_insert
    selected_svc = rejected_svc
    insert_many_parts_to_clickhouse()
    with Then("check ClickHouseRejectedInsert firing"):
        fired = alerts.wait_alert_state("ClickHouseRejectedInsert", "firing", True, labels={"hostname": rejected_svc}, time_range="30s",
                                        sleep_time=settings.prometheus_scrape_interval)
        assert fired, error("can't get ClickHouseRejectedInsert alert in firing state")

    with Then("check ClickHouseRejectedInsert gone away"):
        resolved = alerts.wait_alert_state("ClickHouseRejectedInsert", "firing", False, labels={"hostname": rejected_svc})
        assert resolved, error("can't check ClickHouseRejectedInsert alert is gone away")

    clickhouse.query(chi_name, "SYSTEM START MERGES default.test", host=selected_svc, ns=kubectl.namespace)
    clickhouse.drop_table_on_cluster(chi)
def test_keeper_outline(
    self,
    keeper_type="zookeeper",
    pod_for_insert_data="chi-test-cluster-for-zk-default-0-1-0",
    keeper_manifest_1_node='zookeeper-1-node-1GB-for-tests-only.yaml',
    keeper_manifest_3_node='zookeeper-3-nodes-1GB-for-tests-only.yaml',
):
    """
    test scenario for Zoo/Clickhouse Keeper

    CH 1 -> 2 wait complete + Keeper 1 -> 3 nowait
    CH 2 -> 1 wait complete + Keeper 3 -> 1 nowait
    CH 1 -> 2 wait complete + Keeper 1 -> 3 nowait
    """
    def insert_replicated_data(chi, create_tables, insert_tables):
        with When(f'create if not exists replicated tables {create_tables}'):
            for table in create_tables:
                clickhouse.create_table_on_cluster(
                    chi,
                    'all-sharded',
                    f'default.{table}',
                    f'(id UInt64) ENGINE=ReplicatedMergeTree(\'/clickhouse/tables/default.{table}/{{shard}}\',\'{{replica}}\') ORDER BY (id)',
                    if_not_exists=True,
                )
        with When(f'insert tables data {insert_tables}'):
            for table in insert_tables:
                clickhouse.query(
                    chi['metadata']['name'],
                    f'INSERT INTO default.{table} SELECT rand()+number FROM numbers(1000)',
                    pod=pod_for_insert_data)

    def check_zk_root_znode(chi, pod_count, retry_count=5):
        for pod_num in range(pod_count):
            out = ""
            expected_out = ""
            for i in range(retry_count):
                if keeper_type == "zookeeper-operator":
                    expected_out = "[clickhouse, zookeeper, zookeeper-operator]"
                    keeper_cmd = './bin/zkCli.sh ls /'
                    pod_prefix = "zookeeper"
                elif keeper_type == "zookeeper":
                    expected_out = "[clickhouse, zookeeper]"
                    keeper_cmd = './bin/zkCli.sh ls /'
                    pod_prefix = "zookeeper"
                else:
                    expected_out = "clickhouse"
                    keeper_cmd = "if [[ ! $(command -v zookeepercli) ]]; then "
                    keeper_cmd += "wget -q -O /tmp/zookeepercli.deb https://github.com/outbrain/zookeepercli/releases/download/v1.0.12/zookeepercli_1.0.12_amd64.deb; "
                    keeper_cmd += "dpkg -i /tmp/zookeepercli.deb; "
                    keeper_cmd += "fi; "
                    keeper_cmd += "zookeepercli -servers 127.0.0.1:2181 -c ls /"
                    pod_prefix = "clickhouse-keeper"

                out = kubectl.launch(
                    f"exec {pod_prefix}-{pod_num} -- bash -ce '{keeper_cmd}'",
                    ns=settings.test_namespace,
                    ok_to_fail=True)
                if expected_out in out:
                    break
                else:
                    with Then(
                            f"{keeper_type} ROOT NODE not ready, wait {(i + 1) * 3} sec"
                    ):
                        time.sleep((i + 1) * 3)
            assert expected_out in out, f"Unexpected {keeper_type} `ls /` output"

        out = clickhouse.query(
            chi["metadata"]["name"],
            "SELECT count() FROM system.zookeeper WHERE path='/'")
        expected_out = {
            "zookeeper": "2",
            "zookeeper-operator": "3",
            "clickhouse-keeper": "1",
        }
        assert expected_out[keeper_type] == out.strip(
            " \t\r\n"
        ), f"Unexpected `SELECT count() FROM system.zookeeper WHERE path='/'` output {out}"

    def rescale_zk_and_clickhouse(ch_node_count,
                                  keeper_node_count,
                                  first_install=False):
        keeper_manifest = keeper_manifest_1_node if keeper_node_count == 1 else keeper_manifest_3_node
        _, chi = util.install_clickhouse_and_keeper(
            chi_file=
            f'manifests/chi/test-cluster-for-{keeper_type}-{ch_node_count}.yaml',
            chi_template_file='manifests/chit/tpl-clickhouse-latest.yaml',
            chi_name='test-cluster-for-zk',
            keeper_manifest=keeper_manifest,
            keeper_type=keeper_type,
            clean_ns=first_install,
            force_keeper_install=True,
            keeper_install_first=first_install,
            make_object_count=False,
        )
        return chi

    with When("Clean exists ClickHouse Keeper and ZooKeeper"):
        kubectl.delete_all_chi(settings.test_namespace)
        kubectl.delete_all_keeper(settings.test_namespace)

    with When("Install CH 1 node ZK 1 node"):
        chi = rescale_zk_and_clickhouse(ch_node_count=1,
                                        keeper_node_count=1,
                                        first_install=True)
        util.wait_clickhouse_cluster_ready(chi)
        wait_keeper_ready(keeper_type=keeper_type, pod_count=1)
        check_zk_root_znode(chi, pod_count=1)

        util.wait_clickhouse_cluster_ready(chi)
        wait_clickhouse_no_readonly_replicas(chi)
        insert_replicated_data(chi,
                               create_tables=['test_repl1'],
                               insert_tables=['test_repl1'])

    total_iterations = 3
    for iteration in range(total_iterations):
        with When(f"ITERATION {iteration}"):
            with Then("CH 1 -> 2 wait complete + ZK 1 -> 3 nowait"):
                chi = rescale_zk_and_clickhouse(ch_node_count=2,
                                                keeper_node_count=3)
                wait_keeper_ready(keeper_type=keeper_type, pod_count=3)
                check_zk_root_znode(chi, pod_count=3)

                util.wait_clickhouse_cluster_ready(chi)
                wait_clickhouse_no_readonly_replicas(chi)
                insert_replicated_data(
                    chi,
                    create_tables=['test_repl2'],
                    insert_tables=['test_repl1', 'test_repl2'])

            with Then("CH 2 -> 1 wait complete + ZK 3 -> 1 nowait"):
                chi = rescale_zk_and_clickhouse(ch_node_count=1,
                                                keeper_node_count=1)
                wait_keeper_ready(keeper_type=keeper_type, pod_count=1)
                check_zk_root_znode(chi, pod_count=1)

                util.wait_clickhouse_cluster_ready(chi)
                wait_clickhouse_no_readonly_replicas(chi)
                insert_replicated_data(
                    chi,
                    create_tables=['test_repl3'],
                    insert_tables=['test_repl1', 'test_repl2', 'test_repl3'])

    with When("CH 1 -> 2 wait complete + ZK 1 -> 3 nowait"):
        chi = rescale_zk_and_clickhouse(ch_node_count=2, keeper_node_count=3)
        check_zk_root_znode(chi, pod_count=3)

    with Then('check data in tables'):
        for table_name, exptected_rows in {
                "test_repl1": str(1000 + 2000 * total_iterations),
                "test_repl2": str(2000 * total_iterations),
                "test_repl3": str(1000 * total_iterations)
        }.items():
            actual_rows = clickhouse.query(
                chi['metadata']['name'],
                f'SELECT count() FROM default.{table_name}',
                pod="chi-test-cluster-for-zk-default-0-1-0")
            assert actual_rows == exptected_rows, f"Invalid rows counter after inserts {table_name} expected={exptected_rows} actual={actual_rows}"

    with Then('drop all created tables'):
        for i in range(3):
            clickhouse.drop_table_on_cluster(chi, 'all-sharded',
                                             f'default.test_repl{i + 1}')