def restart_zookeeper(): kubectl.launch( f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"", ok_to_fail=True, ) clickhouse.query_with_error(chi_name, "SELECT name, path FROM system.zookeeper WHERE path='/'", host=svc1) clickhouse.query_with_error(chi_name, "SELECT name, path FROM system.zookeeper WHERE path='/'", host=svc2)
def test_read_only_replica(self, prometheus_operator_spec, clickhouse_operator_spec, chi): read_only_pod, read_only_svc, other_pod, other_svc = alerts.random_pod_choice_for_callbacks(chi) chi_name = chi["metadata"]["name"] clickhouse.create_table_on_cluster( chi, 'all-replicated', 'default.test_repl', '(event_time DateTime, test UInt64) ' + 'ENGINE ReplicatedMergeTree(\'/clickhouse/tables/{installation}-{shard}/test_repl\', \'{replica}\') ORDER BY tuple()' ) def restart_zookeeper(): kubectl.launch( f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"", ok_to_fail=True, ) clickhouse.query_with_error(chi_name, "INSERT INTO default.test_repl VALUES(now(),rand())", host=read_only_svc) with Then("check ClickHouseReadonlyReplica firing"): fired = alerts.wait_alert_state("ClickHouseReadonlyReplica", "firing", True, labels={"hostname": read_only_svc}, time_range='30s', sleep_time=settings.prometheus_scrape_interval, callback=restart_zookeeper) assert fired, error("can't get ClickHouseReadonlyReplica alert in firing state") with Then("check ClickHouseReadonlyReplica gone away"): resolved = alerts.wait_alert_state("ClickHouseReadonlyReplica", "firing", False, labels={"hostname": read_only_svc}) assert resolved, error("can't check ClickHouseReadonlyReplica alert is gone away") kubectl.wait_pod_status("zookeeper-0", "Running", ns=kubectl.namespace) kubectl.wait_jsonpath("pod", "zookeeper-0", "{.status.containerStatuses[0].ready}", "true", ns=kubectl.namespace) for i in range(11): zookeeper_status = kubectl.launch( f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"echo ruok | nc 127.0.0.1 2181\"", ok_to_fail=True ) if "imok" in zookeeper_status: break elif i == 10: fail(f"invalid zookeeper status after {i} retries") with Then("zookeper is not ready, wait 2 seconds"): time.sleep(2) clickhouse.query_with_error( chi_name, "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl", host=read_only_svc, timeout=240 ) clickhouse.query_with_error( chi_name, "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl", host=other_svc, timeout=240 ) clickhouse.drop_table_on_cluster(chi, 'all-replicated', 'default.test_repl')
def reboot_clickhouse_and_distributed_exection(): # we need 70 delayed files for catch insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 10000' select_sql = 'SELECT count() FROM default.test_distr' with Then("reboot clickhouse-server pod"): kubectl.launch( f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse-pod -- kill 1", ok_to_fail=True, ) with Then("Insert to distributed table"): clickhouse.query(chi["metadata"]["name"], insert_sql, host=delayed_pod, ns=kubectl.namespace) with Then("Select from distributed table"): clickhouse.query_with_error(chi["metadata"]["name"], select_sql, host=delayed_pod, ns=kubectl.namespace)
def insert_many_parts_to_clickhouse(): stop_merges = "SYSTEM STOP MERGES default.test;" min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;" with When(f"Insert to MergeTree table {parts_limits} parts"): r = parts_limits sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};" clickhouse.query(chi_name, sql, host=selected_svc, ns=kubectl.namespace) sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;" clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace) with Then(f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"): time.sleep(prometheus_scrape_interval * 2) with Then("after 21.8 InsertedRows include system.* rows"): for i in range(35): sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;" clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
def insert_distributed_sync(): with When("Insert to distributed table SYNC"): # look to https://github.com/ClickHouse/ClickHouse/pull/14260#issuecomment-683616862 # insert_sql = 'INSERT INTO default.test_distr SELECT now(), number FROM numbers(toUInt64(5e9))' insert_sql = "INSERT INTO FUNCTION remote('127.1', currentDatabase(), test_distr) SELECT now(), number FROM numbers(toUInt64(5e9))" insert_params = '--insert_distributed_timeout=1 --insert_distributed_sync=1' error = clickhouse.query_with_error( chi["metadata"]["name"], insert_sql, pod=sync_pod, host=sync_pod, ns=kubectl.namespace, advanced_params=insert_params ) assert 'Code: 159' in error
def test_ch_001(self): util.require_zookeeper() quorum_template = "manifests/chit/tpl-clickhouse-21.8.yaml" chit_data = yaml_manifest.get_manifest_data( util.get_full_path(quorum_template)) kubectl.launch(f"delete chit {chit_data['metadata']['name']}", ns=settings.test_namespace, ok_to_fail=True) kubectl.create_and_check("manifests/chi/test-ch-001-insert-quorum.yaml", { "apply_templates": {quorum_template}, "pod_count": 2, "do_not_delete": 1, }) chi = yaml_manifest.get_chi_name( util.get_full_path("manifests/chi/test-ch-001-insert-quorum.yaml")) chi_data = kubectl.get("chi", ns=settings.test_namespace, name=chi) util.wait_clickhouse_cluster_ready(chi_data) host0 = "chi-test-ch-001-insert-quorum-default-0-0" host1 = "chi-test-ch-001-insert-quorum-default-0-1" create_table = """ create table t1 on cluster default (a Int8, d Date default today()) Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') partition by d order by a TTL d + interval 5 second SETTINGS merge_with_ttl_timeout=5""".replace('\r', '').replace('\n', '') create_mv_table2 = """ create table t2 on cluster default (a Int8) Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') partition by tuple() order by a""".replace('\r', '').replace('\n', '') create_mv_table3 = """ create table t3 on cluster default (a Int8) Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') partition by tuple() order by a""".replace('\r', '').replace('\n', '') create_mv2 = "create materialized view t_mv2 on cluster default to t2 as select a from t1" create_mv3 = "create materialized view t_mv3 on cluster default to t3 as select a from t1" with Given("Tables t1, t2, t3 and MVs t1->t2, t1-t3 are created"): clickhouse.query(chi, create_table) clickhouse.query(chi, create_mv_table2) clickhouse.query(chi, create_mv_table3) clickhouse.query(chi, create_mv2) clickhouse.query(chi, create_mv3) with When("Add a row to an old partition"): clickhouse.query(chi, "insert into t1(a,d) values(6, today()-1)", host=host0) with When("Stop fetches for t1 at replica1"): clickhouse.query(chi, "system stop fetches default.t1", host=host1) with Then("Wait 10 seconds and the data should be dropped by TTL"): time.sleep(10) out = clickhouse.query(chi, "select count() from t1 where a=6", host=host0) assert out == "0", error() with When("Resume fetches for t1 at replica1"): clickhouse.query(chi, "system start fetches default.t1", host=host1) time.sleep(5) with Then("Inserts should resume"): clickhouse.query(chi, "insert into t1(a) values(7)", host=host0) clickhouse.query(chi, "insert into t1(a) values(1)") with When("Stop fetches for t2 at replica1"): clickhouse.query(chi, "system stop fetches default.t2", host=host1) with Then("Insert should fail since it can not reach the quorum"): out = clickhouse.query_with_error( chi, "insert into t1(a) values(2)", host=host0) assert "Timeout while waiting for quorum" in out, error() # kubectl(f"exec {host0}-0 -n test -- cp /var/lib//clickhouse/data/default/t2/all_1_1_0/a.mrk2 /var/lib//clickhouse/data/default/t2/all_1_1_0/a.bin") # with Then("Corrupt data part in t2"): # kubectl(f"exec {host0}-0 -n test -- sed -i \"s/b/c/\" /var/lib/clickhouse/data/default/t2/all_1_1_0/a.bin") with When("Resume fetches for t2 at replica1"): clickhouse.query(chi, "system start fetches default.t2", host=host1) i = 0 while "2" != clickhouse.query( chi, "select active_replicas from system.replicas where database='default' and table='t1'", pod=host0) and i < 10: with Then("Not ready, wait 5 seconds"): time.sleep(5) i += 1 with Then( "Inserts should fail with an error regarding not satisfied quorum" ): out = clickhouse.query_with_error( chi, "insert into t1(a) values(3)", host=host0) assert "Quorum for previous write has not been satisfied yet" in out, error( ) with And("Second insert of the same block should pass"): clickhouse.query(chi, "insert into t1(a) values(3)", host=host0) with And("Insert of the new block should fail"): out = clickhouse.query_with_error( chi, "insert into t1(a) values(4)", host=host0) assert "Quorum for previous write has not been satisfied yet" in out, error( ) with And( "Second insert of the same block with 'deduplicate_blocks_in_dependent_materialized_views' setting should fail" ): out = clickhouse.query_with_error( chi, "set deduplicate_blocks_in_dependent_materialized_views=1; insert into t1(a) values(5)", host=host0) assert "Quorum for previous write has not been satisfied yet" in out, error( ) out = clickhouse.query_with_error( chi, "select t1.a t1_a, t2.a t2_a from t1 left outer join t2 using (a) order by t1_a settings join_use_nulls=1" ) note(out)
def restart_zookeeper(): kubectl.launch( f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"", ok_to_fail=True, ) clickhouse.query_with_error(chi_name, "INSERT INTO default.test_repl VALUES(now(),rand())", host=read_only_svc)