def restart_zookeeper():
     kubectl.launch(
         f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"",
         ok_to_fail=True,
     )
     clickhouse.query_with_error(chi_name, "SELECT name, path FROM system.zookeeper WHERE path='/'", host=svc1)
     clickhouse.query_with_error(chi_name, "SELECT name, path FROM system.zookeeper WHERE path='/'", host=svc2)
def test_read_only_replica(self, prometheus_operator_spec, clickhouse_operator_spec, chi):
    read_only_pod, read_only_svc, other_pod, other_svc = alerts.random_pod_choice_for_callbacks(chi)
    chi_name = chi["metadata"]["name"]
    clickhouse.create_table_on_cluster(
        chi,
        'all-replicated', 'default.test_repl',
        '(event_time DateTime, test UInt64) ' +
        'ENGINE ReplicatedMergeTree(\'/clickhouse/tables/{installation}-{shard}/test_repl\', \'{replica}\') ORDER BY tuple()'
    )

    def restart_zookeeper():
        kubectl.launch(
            f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"",
            ok_to_fail=True,
        )
        clickhouse.query_with_error(chi_name, "INSERT INTO default.test_repl VALUES(now(),rand())", host=read_only_svc)

    with Then("check ClickHouseReadonlyReplica firing"):
        fired = alerts.wait_alert_state("ClickHouseReadonlyReplica", "firing", True, labels={"hostname": read_only_svc},
                                        time_range='30s', sleep_time=settings.prometheus_scrape_interval, callback=restart_zookeeper)
        assert fired, error("can't get ClickHouseReadonlyReplica alert in firing state")
    with Then("check ClickHouseReadonlyReplica gone away"):
        resolved = alerts.wait_alert_state("ClickHouseReadonlyReplica", "firing", False, labels={"hostname": read_only_svc})
        assert resolved, error("can't check ClickHouseReadonlyReplica alert is gone away")

    kubectl.wait_pod_status("zookeeper-0", "Running", ns=kubectl.namespace)
    kubectl.wait_jsonpath("pod", "zookeeper-0", "{.status.containerStatuses[0].ready}", "true",
                          ns=kubectl.namespace)

    for i in range(11):
        zookeeper_status = kubectl.launch(
            f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"echo ruok | nc 127.0.0.1 2181\"", ok_to_fail=True
        )
        if "imok" in zookeeper_status:
            break
        elif i == 10:
            fail(f"invalid zookeeper status after {i} retries")
        with Then("zookeper is not ready, wait 2 seconds"):
            time.sleep(2)

    clickhouse.query_with_error(
        chi_name, "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl",
        host=read_only_svc, timeout=240
    )
    clickhouse.query_with_error(
        chi_name, "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl",
        host=other_svc, timeout=240
    )

    clickhouse.drop_table_on_cluster(chi, 'all-replicated', 'default.test_repl')
    def reboot_clickhouse_and_distributed_exection():
        # we need 70 delayed files for catch
        insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 10000'
        select_sql = 'SELECT count() FROM default.test_distr'
        with Then("reboot clickhouse-server pod"):
            kubectl.launch(
                f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse-pod -- kill 1",
                ok_to_fail=True,
            )
            with Then("Insert to distributed table"):
                clickhouse.query(chi["metadata"]["name"], insert_sql, host=delayed_pod, ns=kubectl.namespace)

            with Then("Select from distributed table"):
                clickhouse.query_with_error(chi["metadata"]["name"], select_sql, host=delayed_pod,
                                            ns=kubectl.namespace)
    def insert_many_parts_to_clickhouse():
        stop_merges = "SYSTEM STOP MERGES default.test;"
        min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;"
        with When(f"Insert to MergeTree table {parts_limits} parts"):
            r = parts_limits
            sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};"
            clickhouse.query(chi_name, sql, host=selected_svc, ns=kubectl.namespace)

            sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
            clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
            with Then(f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"):
                time.sleep(prometheus_scrape_interval * 2)

            with Then("after 21.8 InsertedRows include system.* rows"):
                for i in range(35):
                    sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
                    clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
 def insert_distributed_sync():
     with When("Insert to distributed table SYNC"):
         # look to https://github.com/ClickHouse/ClickHouse/pull/14260#issuecomment-683616862
         # insert_sql = 'INSERT INTO default.test_distr SELECT now(), number FROM numbers(toUInt64(5e9))'
         insert_sql = "INSERT INTO FUNCTION remote('127.1', currentDatabase(), test_distr) SELECT now(), number FROM numbers(toUInt64(5e9))"
         insert_params = '--insert_distributed_timeout=1 --insert_distributed_sync=1'
         error = clickhouse.query_with_error(
             chi["metadata"]["name"], insert_sql,
             pod=sync_pod, host=sync_pod, ns=kubectl.namespace, advanced_params=insert_params
         )
         assert 'Code: 159' in error
예제 #6
0
def test_ch_001(self):
    util.require_zookeeper()
    quorum_template = "manifests/chit/tpl-clickhouse-21.8.yaml"
    chit_data = yaml_manifest.get_manifest_data(
        util.get_full_path(quorum_template))

    kubectl.launch(f"delete chit {chit_data['metadata']['name']}",
                   ns=settings.test_namespace,
                   ok_to_fail=True)
    kubectl.create_and_check("manifests/chi/test-ch-001-insert-quorum.yaml", {
        "apply_templates": {quorum_template},
        "pod_count": 2,
        "do_not_delete": 1,
    })

    chi = yaml_manifest.get_chi_name(
        util.get_full_path("manifests/chi/test-ch-001-insert-quorum.yaml"))
    chi_data = kubectl.get("chi", ns=settings.test_namespace, name=chi)
    util.wait_clickhouse_cluster_ready(chi_data)

    host0 = "chi-test-ch-001-insert-quorum-default-0-0"
    host1 = "chi-test-ch-001-insert-quorum-default-0-1"

    create_table = """
    create table t1 on cluster default (a Int8, d Date default today())
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by d order by a 
    TTL d + interval 5 second
    SETTINGS merge_with_ttl_timeout=5""".replace('\r', '').replace('\n', '')

    create_mv_table2 = """
    create table t2 on cluster default (a Int8)
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by tuple() order by a""".replace('\r', '').replace('\n', '')

    create_mv_table3 = """
    create table t3 on cluster default (a Int8)
    Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}')
    partition by tuple() order by a""".replace('\r', '').replace('\n', '')

    create_mv2 = "create materialized view t_mv2 on cluster default to t2 as select a from t1"
    create_mv3 = "create materialized view t_mv3 on cluster default to t3 as select a from t1"

    with Given("Tables t1, t2, t3 and MVs t1->t2, t1-t3 are created"):
        clickhouse.query(chi, create_table)
        clickhouse.query(chi, create_mv_table2)
        clickhouse.query(chi, create_mv_table3)

        clickhouse.query(chi, create_mv2)
        clickhouse.query(chi, create_mv3)

        with When("Add a row to an old partition"):
            clickhouse.query(chi,
                             "insert into t1(a,d) values(6, today()-1)",
                             host=host0)

        with When("Stop fetches for t1 at replica1"):
            clickhouse.query(chi, "system stop fetches default.t1", host=host1)

            with Then("Wait 10 seconds and the data should be dropped by TTL"):
                time.sleep(10)
                out = clickhouse.query(chi,
                                       "select count() from t1 where a=6",
                                       host=host0)
                assert out == "0", error()

        with When("Resume fetches for t1 at replica1"):
            clickhouse.query(chi,
                             "system start fetches default.t1",
                             host=host1)
            time.sleep(5)

            with Then("Inserts should resume"):
                clickhouse.query(chi,
                                 "insert into t1(a) values(7)",
                                 host=host0)

        clickhouse.query(chi, "insert into t1(a) values(1)")

        with When("Stop fetches for t2 at replica1"):
            clickhouse.query(chi, "system stop fetches default.t2", host=host1)

            with Then("Insert should fail since it can not reach the quorum"):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(2)", host=host0)
                assert "Timeout while waiting for quorum" in out, error()

        # kubectl(f"exec {host0}-0 -n test -- cp /var/lib//clickhouse/data/default/t2/all_1_1_0/a.mrk2 /var/lib//clickhouse/data/default/t2/all_1_1_0/a.bin")
        # with Then("Corrupt data part in t2"):
        #    kubectl(f"exec {host0}-0 -n test -- sed -i \"s/b/c/\" /var/lib/clickhouse/data/default/t2/all_1_1_0/a.bin")

        with When("Resume fetches for t2 at replica1"):
            clickhouse.query(chi,
                             "system start fetches default.t2",
                             host=host1)
            i = 0
            while "2" != clickhouse.query(
                    chi,
                    "select active_replicas from system.replicas where database='default' and table='t1'",
                    pod=host0) and i < 10:
                with Then("Not ready, wait 5 seconds"):
                    time.sleep(5)
                    i += 1

            with Then(
                    "Inserts should fail with an error regarding not satisfied quorum"
            ):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(3)", host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

            with And("Second insert of the same block should pass"):
                clickhouse.query(chi,
                                 "insert into t1(a) values(3)",
                                 host=host0)

            with And("Insert of the new block should fail"):
                out = clickhouse.query_with_error(
                    chi, "insert into t1(a) values(4)", host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

            with And(
                    "Second insert of the same block with 'deduplicate_blocks_in_dependent_materialized_views' setting should fail"
            ):
                out = clickhouse.query_with_error(
                    chi,
                    "set deduplicate_blocks_in_dependent_materialized_views=1; insert into t1(a) values(5)",
                    host=host0)
                assert "Quorum for previous write has not been satisfied yet" in out, error(
                )

        out = clickhouse.query_with_error(
            chi,
            "select t1.a t1_a, t2.a t2_a from t1 left outer join t2 using (a) order by t1_a settings join_use_nulls=1"
        )
        note(out)
 def restart_zookeeper():
     kubectl.launch(
         f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"",
         ok_to_fail=True,
     )
     clickhouse.query_with_error(chi_name, "INSERT INTO default.test_repl VALUES(now(),rand())", host=read_only_svc)