def test_020(config="configs/test-020-multi-volume.yaml"): chi = get_chi_name(get_full_path(config)) create_and_check( config, { "pod_count": 1, "pod_volumes": {"/var/lib/clickhouse", "/var/lib/clickhouse2"}, "do_not_delete": 1 }) with When("Create a table and insert 1 row"): clickhouse_query( chi, "create table test_disks(a Int8) Engine = MergeTree() order by a") clickhouse_query(chi, "insert into test_disks values (1)") with Then("Data should be placed on default disk"): out = clickhouse_query( chi, "select disk_name from system.parts where table='test_disks'") assert out == 'default' with When("alter table test_disks move partition tuple() to disk 'disk2'"): clickhouse_query( chi, "alter table test_disks move partition tuple() to disk 'disk2'") with Then("Data should be placed on disk2"): out = clickhouse_query( chi, "select disk_name from system.parts where table='test_disks'") assert out == 'disk2' kube_delete_chi(chi)
def test_014(): require_zookeeper() create_table = """ create table t (a Int8) Engine = ReplicatedMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{database}/{table}', '{replica}') partition by tuple() order by a""".replace('\r', '').replace('\n', '') create_and_check( "configs/test-014-replication.yaml", { "apply_templates": {settings.clickhouse_template}, "object_counts": [2, 2, 3], "do_not_delete": 1 }) with Given("Table is created on a first replica and data is inserted"): clickhouse_query("test-014-replication", create_table, host="chi-test-014-replication-default-0-0") clickhouse_query("test-014-replication", "insert into t values(1)", host="chi-test-014-replication-default-0-0") with When("Table is created on the second replica"): clickhouse_query("test-014-replication", create_table, host="chi-test-014-replication-default-0-1") with Then("Data should be replicated"): out = clickhouse_query( "test-014-replication", "select a from t", host="chi-test-014-replication-default-0-1") assert out == "1" with When("Add one more replica"): create_and_check("configs/test-014-replication-2.yaml", { "pod_count": 3, "do_not_delete": 1 }) # that also works: # kubectl patch chi test-014-replication -n test --type=json -p '[{"op":"add", "path": "/spec/configuration/clusters/0/layout/shards/0/replicasCount", "value": 3}]' with Then("Replicated table should be automatically created"): out = clickhouse_query("test-014-replication", "select a from t", host="chi-test-014-replication-default-0-2") assert out == "1" with When("Remove replica"): create_and_check("configs/test-014-replication.yaml", { "pod_count": 1, "do_not_delete": 1 }) with Then("Replica needs to be removed from the Zookeeper as well"): out = clickhouse_query( "test-014-replication", "select count() from system.replicas where table='t'") assert out == "1" kube_delete_chi("test-014-replication")
def test_system_settings_changed(self): changed_pod, changed_svc, _, _ = alerts.random_pod_choice_for_callbacks( chi) with When("apply changed settings"): kubectl.create_and_check( config="configs/test-cluster-for-alerts-changed-settings.yaml", check={ "apply_templates": [ "templates/tpl-clickhouse-stable.yaml", "templates/tpl-persistent-volume-100Mi.yaml" ], "object_counts": { "statefulset": 2, "pod": 2, "service": 3, }, "do_not_delete": 1 }) with Then("check ClickHouseSystemSettingsChanged firing"): fired = alerts.wait_alert_state("ClickHouseSystemSettingsChanged", "firing", True, labels={"hostname": changed_svc}, time_range="30s") assert fired, error( "can't get ClickHouseTooManyConnections alert in firing state") with When("rollback changed settings"): kubectl.create_and_check( config="configs/test-cluster-for-alerts.yaml", check={ "apply_templates": [ "templates/tpl-clickhouse-latest.yaml", "templates/tpl-clickhouse-alerts.yaml", "templates/tpl-persistent-volume-100Mi.yaml" ], "object_counts": { "statefulset": 2, "pod": 2, "service": 3, }, "do_not_delete": 1 }) with Then("check ClickHouseSystemSettingsChanged gone away"): resolved = alerts.wait_alert_state("ClickHouseSystemSettingsChanged", "firing", False, labels={"hostname": changed_svc}, sleep_time=30) assert resolved, error( "can't check ClickHouseTooManyConnections alert is gone away")
def test_011_1(): with Given( "test-011-secured-default.yaml with password_sha256_hex for default user" ): kubectl.create_and_check( config="configs/test-011-secured-default.yaml", check={ "pod_count": 1, "do_not_delete": 1, }) with Then("Default user password should be '_removed_'"): chi = kubectl.get("chi", "test-011-secured-default") assert "default/password" in chi["status"]["normalized"][ "configuration"]["users"] assert chi["status"]["normalized"]["configuration"]["users"][ "default/password"] == "_removed_" with And("Connection to localhost should succeed with default user"): out = clickhouse.query_with_error( "test-011-secured-default", "select 'OK'", pwd="clickhouse_operator_password") assert out == 'OK' with When("Trigger installation update"): kubectl.create_and_check( config="configs/test-011-secured-default-2.yaml", check={ "do_not_delete": 1, }) with Then("Default user password should be '_removed_'"): chi = kubectl.get("chi", "test-011-secured-default") assert "default/password" in chi["status"]["normalized"][ "configuration"]["users"] assert chi["status"]["normalized"]["configuration"]["users"][ "default/password"] == "_removed_" with When("Default user is assigned the different profile"): kubectl.create_and_check( config="configs/test-011-secured-default-3.yaml", check={ "do_not_delete": 1, }) with Then("Wait until configmap is reloaded"): # Need to wait to make sure configuration is reloaded. For some reason it takes long here # Maybe we can restart the pod to speed it up time.sleep(120) with Then( "Connection to localhost should succeed with default user" ): out = clickhouse.query_with_error("test-011-secured-default", "select 'OK'") assert out == 'OK' kubectl.delete_chi("test-011-secured-default")
def test_021(config="configs/test-021-rescale-volume.yaml"): with Given("Default storage class is expandable"): default_storage_class = kube_get_default_storage_class() assert len(default_storage_class) > 0 allowVolumeExpansion = kube_get_field("storageclass", default_storage_class, ".allowVolumeExpansion") if allowVolumeExpansion != "true": kubectl( f"patch storageclass {default_storage_class} -p '{{\"allowVolumeExpansion\":true}}'" ) chi = get_chi_name(get_full_path(config)) create_and_check(config, {"pod_count": 1, "do_not_delete": 1}) with Then("Storage size should be 100Mi"): size = kube_get_pvc_size( "disk1-chi-test-021-rescale-volume-simple-0-0-0") assert size == "100Mi" with When("Re-scale volume configuration to 200Mb"): create_and_check("configs/test-021-rescale-volume-add-storage.yaml", { "pod_count": 1, "do_not_delete": 1 }) with Then("Storage size should be 200Mi"): size = kube_get_pvc_size( "disk1-chi-test-021-rescale-volume-simple-0-0-0") assert size == "200Mi" with When("Add second disk 50Mi"): create_and_check( "configs/test-021-rescale-volume-add-disk.yaml", { "pod_count": 1, "pod_volumes": {"/var/lib/clickhouse", "/var/lib/clickhouse2"}, "do_not_delete": 1 }) with Then("There should be two PVC"): size = kube_get_pvc_size( "disk1-chi-test-021-rescale-volume-simple-0-0-0") assert size == "200Mi" size = kube_get_pvc_size( "disk2-chi-test-021-rescale-volume-simple-0-0-0") assert size == "50Mi" with And("There should be two disks recognized by ClickHouse"): out = clickhouse_query(chi, "select count() from system.disks") assert out == "2" kube_delete_chi(chi)
def test_019(config="configs/test-019-retain-volume.yaml"): require_zookeeper() chi = manifest.get_chi_name(util.get_full_path(config)) kubectl.create_and_check( config=config, check={ "pod_count": 1, "do_not_delete": 1, }) create_non_replicated_table = "create table t1 Engine = Log as select 1 as a" create_replicated_table = """ create table t2 Engine = ReplicatedMergeTree('/clickhouse/{installation}/{cluster}/tables/{shard}/{database}/{table}', '{replica}') partition by tuple() order by a as select 1 as a""".replace('\r', '').replace('\n', '') with Given("ClickHouse has some data in place"): clickhouse.query(chi, sql=create_non_replicated_table) clickhouse.query(chi, sql=create_replicated_table) with When("CHI with retained volume is deleted"): pvc_count = kubectl.get_count("pvc") pv_count = kubectl.get_count("pv") kubectl.delete_chi(chi) with Then("PVC should be retained"): assert kubectl.get_count("pvc") == pvc_count assert kubectl.get_count("pv") == pv_count with When("Re-create CHI"): kubectl.create_and_check( config=config, check={ "pod_count": 1, "do_not_delete": 1, }) with Then("PVC should be re-mounted"): with And("Non-replicated table should have data"): out = clickhouse.query(chi, sql="select a from t1") assert out == "1" with And("Replicated table should have data"): out = clickhouse.query(chi, sql="select a from t2") assert out == "1" kubectl.delete_chi(chi)
def test_backup_is_down(self): reboot_pod, _, _, _ = alerts.random_pod_choice_for_callbacks(chi) def reboot_backup_container(): kubectl.launch( f"exec -n {settings.test_namespace} {reboot_pod} -c clickhouse-backup -- kill 1", ok_to_fail=True, ) with When("reboot clickhouse-backup"): fired = alerts.wait_alert_state("ClickHouseBackupDown", "firing", expected_state=True, callback=reboot_backup_container, labels={"pod_name": reboot_pod}, time_range='60s') assert fired, error( "can't get ClickHouseBackupDown alert in firing state") with Then("check ClickHouseBackupDown gone away"): resolved = alerts.wait_alert_state( "ClickHouseBackupDown", "firing", expected_state=False, sleep_time=settings.prometheus_scrape_interval, labels={"pod_name": reboot_pod}, ) assert resolved, error( "can't get ClickHouseBackupDown alert is gone away") with When("reboot clickhouse-backup again"): fired = alerts.wait_alert_state("ClickHouseBackupRecentlyRestart", "firing", expected_state=True, callback=reboot_backup_container, labels={"pod_name": reboot_pod}, time_range='60s') assert fired, error( "can't get ClickHouseBackupRecentlyRestart alert in firing state") with Then("check ClickHouseBackupRecentlyRestart gone away"): resolved = alerts.wait_alert_state("ClickHouseBackupRecentlyRestart", "firing", expected_state=False, time_range='30s', labels={"pod_name": reboot_pod}, sleep_time=30) assert resolved, error( "can't get ClickHouseBackupRecentlyRestart alert is gone away")
def test_version_changed(): changed_pod, changed_svc, _, _ = random_pod_choice_for_callbacks() with When("apply changed settings"): kubectl.create_and_check( config="configs/test-cluster-for-alerts-changed-settings.yaml", check={ "apply_templates": [ "templates/tpl-clickhouse-20.7.yaml", "templates/tpl-persistent-volume-100Mi.yaml" ], "object_counts": { "statefulset": 2, "pod": 2, "service": 3, }, "do_not_delete": 1 } ) prometheus_scrape_interval = 15 with And(f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"): time.sleep(prometheus_scrape_interval * 2) with Then("check ClickHouseVersionChanged firing"): fired = wait_alert_state("ClickHouseVersionChanged", "firing", True, labels={"hostname": changed_svc}, time_range="30s", sleep_time=5) assert fired, error("can't get ClickHouseVersionChanged alert in firing state") with When("rollback changed settings"): kubectl.create_and_check( config="configs/test-cluster-for-alerts.yaml", check={ "apply_templates": [ "templates/tpl-clickhouse-latest.yaml", "templates/tpl-persistent-volume-100Mi.yaml" ], "object_counts": { "statefulset": 2, "pod": 2, "service": 3, }, "do_not_delete": 1 } ) with Then("check ClickHouseVersionChanged gone away"): resolved = wait_alert_state("ClickHouseVersionChanged", "firing", False, labels={"hostname": changed_svc}, sleep_time=30) assert resolved, error("can't check ClickHouseVersionChanged alert is gone away")
def test_distributed_sync_insertion_timeout(): sync_pod, sync_svc, restarted_pod, restarted_svc = random_pod_choice_for_callbacks() create_distributed_table_on_cluster(local_engine='ENGINE Null()') def insert_distributed_sync(): with When("Insert to distributed table SYNC"): # look to https://github.com/ClickHouse/ClickHouse/pull/14260#issuecomment-683616862 # insert_sql = 'INSERT INTO default.test_distr SELECT now(), number FROM numbers(toUInt64(5e9))' insert_sql = "INSERT INTO FUNCTION remote('127.1', currentDatabase(), test_distr) SELECT now(), number FROM numbers(toUInt64(5e9))" insert_params = '--insert_distributed_timeout=1 --insert_distributed_sync=1' error = clickhouse.query_with_error( chi["metadata"]["name"], insert_sql, pod=sync_pod, host=sync_pod, ns=kubectl.namespace, advanced_params=insert_params ) assert 'Code: 159' in error with When("check ClickHouseDistributedSyncInsertionTimeoutExceeded firing"): fired = wait_alert_state("ClickHouseDistributedSyncInsertionTimeoutExceeded", "firing", True, labels={"hostname": sync_svc, "chi": chi["metadata"]["name"]}, time_range='30s', callback=insert_distributed_sync) assert fired, error("can't get ClickHouseDistributedSyncInsertionTimeoutExceeded alert in firing state") with Then("check ClickHouseDistributedSyncInsertionTimeoutExceeded gone away"): resolved = wait_alert_state("ClickHouseDistributedSyncInsertionTimeoutExceeded", "firing", False, labels={"hostname": sync_svc}) assert resolved, error("can't check ClickHouseDistributedSyncInsertionTimeoutExceeded alert is gone away") drop_distributed_table_on_cluster()
def test_operator_restart(config, version=settings.operator_version): with Given(f"clickhouse-operator {version}"): set_operator_version(version) config = util.get_full_path(config) chi = manifest.get_chi_name(config) cluster = chi kubectl.create_and_check(config=config, check={ "object_counts": { "statefulset": 1, "pod": 1, "service": 2, }, "do_not_delete": 1, }) start_time = kubectl.get_field("pod", f"chi-{chi}-{cluster}-0-0-0", ".status.startTime") with When("Restart operator"): restart_operator() time.sleep(5) kubectl.wait_chi_status(chi, "Completed") kubectl.wait_objects(chi, { "statefulset": 1, "pod": 1, "service": 2, }) new_start_time = kubectl.get_field("pod", f"chi-{chi}-{cluster}-0-0-0", ".status.startTime") assert start_time == new_start_time kubectl.delete_chi(chi)
def test_operator_upgrade(config, version_from, version_to=settings.operator_version): version_to = settings.operator_version with Given(f"clickhouse-operator {version_from}"): set_operator_version(version_from) config = util.get_full_path(config) chi = manifest.get_chi_name(config) kubectl.create_and_check(config=config, check={ "object_counts": { "statefulset": 1, "pod": 1, "service": 2, }, "do_not_delete": 1, }) start_time = kubectl.get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") with When(f"upgrade operator to {version_to}"): set_operator_version(version_to, timeout=120) time.sleep(5) kubectl.wait_chi_status(chi, "Completed", retries=6) kubectl.wait_objects(chi, { "statefulset": 1, "pod": 1, "service": 2 }) new_start_time = kubectl.get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") assert start_time == new_start_time kubectl.delete_chi(chi)
def test_operator_restart(config, version=settings.operator_version): with Given(f"clickhouse-operator {version}"): set_operator_version(version) config = get_full_path(config) chi = get_chi_name(config) create_and_check(config, { "object_counts": [1, 1, 2], "do_not_delete": 1 }) start_time = kube_get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") with When("Restart operator"): restart_operator() time.sleep(5) kube_wait_chi_status(chi, "Completed") kube_wait_objects(chi, [1, 1, 2]) new_start_time = kube_get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") # TODO: assert if start_time != new_start_time: print("!!!Pods have been restarted!!!") kube_delete_chi(chi)
def test_operator_upgrade(config, version_from, version_to=settings.operator_version): version_to = settings.operator_version with Given(f"clickhouse-operator {version_from}"): set_operator_version(version_from) config = get_full_path(config) chi = get_chi_name(config) create_and_check(config, { "object_counts": [1, 1, 2], "do_not_delete": 1 }) start_time = kube_get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") with When(f"upgrade operator to {version_to}"): set_operator_version(version_to, timeout=120) time.sleep(5) kube_wait_chi_status(chi, "Completed", retries=6) kube_wait_objects(chi, [1, 1, 2]) new_start_time = kube_get_field("pod", f"chi-{chi}-{chi}-0-0-0", ".status.startTime") # TODO: assert if start_time != new_start_time: print("!!!Pods have been restarted!!!") kube_delete_chi(chi)
def test_distributed_connection_exceptions(): delayed_pod, delayed_svc, restarted_pod, restarted_svc = random_pod_choice_for_callbacks() create_distributed_table_on_cluster() def reboot_clickhouse_and_distributed_exection(): # we need 70 delayed files for catch insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 10000' select_sql = 'SELECT count() FROM default.test_distr' with Then("reboot clickhouse-server pod"): kubectl.launch( f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse -- kill 1", ok_to_fail=True, ) with And("Insert to distributed table"): clickhouse.query(chi["metadata"]["name"], insert_sql, host=delayed_pod, ns=kubectl.namespace) with And("Select from distributed table"): clickhouse.query_with_error(chi["metadata"]["name"], select_sql, host=delayed_pod, ns=kubectl.namespace) with When("check ClickHouseDistributedConnectionExceptions firing"): fired = wait_alert_state("ClickHouseDistributedConnectionExceptions", "firing", True, labels={"hostname": delayed_svc, "chi": chi["metadata"]["name"]}, time_range='30s', callback=reboot_clickhouse_and_distributed_exection) assert fired, error("can't get ClickHouseDistributedConnectionExceptions alert in firing state") with Then("check DistributedConnectionExpections gone away"): resolved = wait_alert_state("ClickHouseDistributedConnectionExceptions", "firing", False, labels={"hostname": delayed_svc}) assert resolved, error("can't check ClickHouseDistributedConnectionExceptions alert is gone away") kubectl.wait_pod_status(restarted_pod, "Running", ns=kubectl.namespace) kubectl.wait_jsonpath("pod", restarted_pod, "{.status.containerStatuses[0].ready}", "true", ns=kubectl.namespace) drop_distributed_table_on_cluster()
def test_clickhouse_dns_errors(): random_idx = random.randint(0, 1) clickhouse_pod = chi["status"]["pods"][random_idx] clickhouse_svc = chi["status"]["fqdns"][random_idx] old_dns = kubectl.launch( f"exec -n {kubectl.namespace} {clickhouse_pod} -c clickhouse -- cat /etc/resolv.conf", ok_to_fail=False, ) new_dns = re.sub(r'^nameserver (.+)', 'nameserver 1.1.1.1', old_dns) def rewrite_dns_on_clickhouse_server(write_new=True): dns = new_dns if write_new else old_dns kubectl.launch( f"exec -n {kubectl.namespace} {clickhouse_pod} -c clickhouse -- bash -c \"printf \\\"{dns}\\\" > /etc/resolv.conf\"", ok_to_fail=False, ) kubectl.launch( f"exec -n {kubectl.namespace} {clickhouse_pod} -c clickhouse -- clickhouse-client --echo -mn -q \"SYSTEM DROP DNS CACHE; SELECT count() FROM cluster('all-sharded',system,metrics)\"", ok_to_fail=True, ) with When("rewrite /etc/resolv.conf in clickhouse-server pod"): fired = wait_alert_state("ClickHouseDNSErrors", "firing", True, labels={"hostname": clickhouse_svc}, time_range='20s', callback=rewrite_dns_on_clickhouse_server, sleep_time=5) assert fired, error("can't get ClickHouseDNSErrors alert in firing state") with Then("check ClickHouseDNSErrors gone away"): rewrite_dns_on_clickhouse_server(write_new=False) resolved = wait_alert_state("ClickHouseDNSErrors", "firing", False, labels={"hostname": clickhouse_svc}) assert resolved, error("can't check ClickHouseDNSErrors alert is gone away")
def test_ch_002(self): kubectl.create_and_check( "configs/test-ch-002-row-level.yaml", { "apply_templates": {"templates/tpl-clickhouse-20.3.yaml"}, "do_not_delete": 1, }) chi = "test-ch-002-row-level" create_table = """create table test (d Date default today(), team LowCardinality(String), user String) Engine = MergeTree() PARTITION BY d ORDER BY d;""" with When("Create test table"): clickhouse.query(chi, create_table) with And("Insert some data"): clickhouse.query( chi, "INSERT INTO test(team, user) values('team1', 'user1'),('team2', 'user2'),('team3', 'user3'),('team4', 'user4')" ) with Then( "Make another query for different users. It should be restricted to corresponding team by row-level security" ): for user in ['user1', 'user2', 'user3', 'user4']: out = clickhouse.query(chi, "select user from test", user=user) assert out == user with Then( "Make a count() query for different users. It should be restricted to corresponding team by row-level security" ): for user in ['user1', 'user2', 'user3', 'user4']: out = clickhouse.query(chi, "select count() from test", user=user) assert out == "1" kubectl.delete_chi(chi)
def query(query, *args, **kwargs): """Execute a query and check that it does not raise an exception. """ with When(f"I execute '{query}'", flags=TE): with Then("it works"): conn.query(query, *args, **kwargs)
def test_clickhouse_server_reboot(): random_idx = random.randint(0, 1) clickhouse_pod = chi["status"]["pods"][random_idx] clickhouse_svc = chi["status"]["fqdns"][random_idx] def reboot_clickhouse_server(): kubectl.launch( f"exec -n {kubectl.namespace} {clickhouse_pod} -c clickhouse -- kill 1", ok_to_fail=True, ) with When("reboot clickhouse-server pod"): fired = wait_alert_state("ClickHouseServerDown", "firing", True, labels={"hostname": clickhouse_svc, "chi": chi["metadata"]["name"]}, callback=reboot_clickhouse_server, sleep_time=5, time_range='30s', max_try=30, ) assert fired, error("can't get ClickHouseServerDown alert in firing state") with Then("check ClickHouseServerDown gone away"): resolved = wait_alert_state("ClickHouseServerDown", "firing", False, labels={"hostname": clickhouse_svc}, time_range='5s', sleep_time=5) assert resolved, error("can't check ClickHouseServerDown alert is gone away") with Then("check ClickHouseServerRestartRecently firing and gone away"): fired = wait_alert_state("ClickHouseServerRestartRecently", "firing", True, labels={"hostname": clickhouse_svc, "chi": chi["metadata"]["name"]}, time_range="30s") assert fired, error("after ClickHouseServerDown gone away, ClickHouseServerRestartRecently shall firing") resolved = wait_alert_state("ClickHouseServerRestartRecently", "firing", False, labels={"hostname": clickhouse_svc}) assert resolved, error("can't check ClickHouseServerRestartRecently alert is gone away")
def test_009(): version_from = "0.7.0" version_to = settings.version with Given(f"clickhouse-operator {version_from}"): set_operator_version(version_from) config = get_full_path("configs/test-009-operator-upgrade.yaml") chi = get_chi_name(config) create_and_check("configs/test-009-operator-upgrade.yaml", { "pod_count": 1, "do_not_delete": 1 }) assert kube_get_count( "statefulset", label="-l clickhouse.altinity.com/app=chop") == 1, error() with When(f"upgrade operator to {version_to}"): set_operator_version(version_to, timeout=120) with And("Wait 20 seconds"): time.sleep(20) with Then("Status should be Completed"): assert kube_get_chi_status(chi) == "Completed" with And("No new statefulsets should be created"): assert kube_get_count( "statefulset", label="-l clickhouse.altinity.com/app=chop" ) == 1, error() kube_delete_chi(chi)
def insert_many_parts_to_clickhouse(): stop_merges = "SYSTEM STOP MERGES default.test;" min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;" with When(f"Insert to MergeTree table {parts_limits} parts"): r = parts_limits sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};" clickhouse.query(chi_name, sql, host=selected_svc, ns=kubectl.namespace) # @TODO we need only one query after resolve https://github.com/ClickHouse/ClickHouse/issues/11384 and switch to 21.3+ sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;" clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace) with Then( f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec" ): time.sleep(prometheus_scrape_interval * 2) sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;" clickhouse.query_with_error(chi_name, sql, host=selected_svc, ns=kubectl.namespace)
def test_metrics_exporter_reboot(): def check_monitoring_chi(operator_namespace, operator_pod, expect_result, max_retries=10): with And(f"metrics-exporter /chi enpoint result should return {expect_result}"): for i in range(1, max_retries): # check /metrics for try to refresh monitored instances kubectl.launch( f"exec {operator_pod} -c metrics-exporter -- wget -O- -q http://127.0.0.1:8888/metrics", ns=operator_namespace ) # check /chi after refresh monitored instances out = kubectl.launch( f"exec {operator_pod} -c metrics-exporter -- wget -O- -q http://127.0.0.1:8888/chi", ns=operator_namespace ) out = json.loads(out) if out == expect_result: break with Then("Not ready. Wait for " + str(i * 5) + " seconds"): time.sleep(i * 5) assert out == expect_result, error() with Given("clickhouse-operator is installed"): kubectl.wait_field("pods", "-l app=clickhouse-operator", ".status.containerStatuses[*].ready", "true,true", ns=settings.operator_namespace) assert kubectl.get_count("pod", ns='--all-namespaces', label="-l app=clickhouse-operator") > 0, error() out = kubectl.launch("get pods -l app=clickhouse-operator", ns=settings.operator_namespace).splitlines()[1] operator_pod = re.split(r'[\t\r\n\s]+', out)[0] operator_namespace = settings.operator_namespace kubectl.delete_ns(kubectl.namespace) kubectl.create_ns(kubectl.namespace) check_monitoring_chi(operator_namespace, operator_pod, []) with And("created simple clickhouse installation"): config = util.get_full_path("../docs/chi-examples/01-simple-layout-01-1shard-1repl.yaml") kubectl.create_and_check( config=config, check={ "object_counts": { "statefulset": 1, "pod": 1, "service": 2, }, "do_not_delete": True, }) expected_chi = [{ "namespace": "test", "name": "simple-01", "hostnames": ["chi-simple-01-cluster-0-0.test.svc.cluster.local"] }] check_monitoring_chi(operator_namespace, operator_pod, expected_chi) with When("reboot metrics exporter"): kubectl.launch(f"exec -n {operator_namespace} {operator_pod} -c metrics-exporter -- reboot") time.sleep(15) kubectl.wait_field("pods", "-l app=clickhouse-operator", ".status.containerStatuses[*].ready", "true,true", ns=settings.operator_namespace) with Then("check metrics exporter still contains chi objects"): check_monitoring_chi(operator_namespace, operator_pod, expected_chi) kubectl.delete(config) check_monitoring_chi(operator_namespace, operator_pod, [])
def test_016(): chi = "test-016-settings" create_and_check( "configs/test-016-settings.yaml", { "apply_templates": {settings.clickhouse_template}, "pod_count": 1, "do_not_delete": 1 }) with Then("Custom macro 'layer' should be available"): out = clickhouse_query( chi, query="select substitution from system.macros where macro='layer'") assert out == "01" with And("Custom macro 'test' should be available"): out = clickhouse_query( chi, query="select substitution from system.macros where macro='test'") assert out == "test" with And("dictGet() should work"): out = clickhouse_query( chi, query="select dictGet('one', 'one', toUInt64(0))") assert out == "0" with And("query_log should be disabled"): clickhouse_query(chi, query="system flush logs") out = clickhouse_query_with_error( chi, query="select count() from system.query_log") assert "doesn't exist" in out with And("max_memory_usage should be 7000000000"): out = clickhouse_query( chi, query= "select value from system.settings where name='max_memory_usage'") assert out == "7000000000" with And("test_usersd user should be available"): clickhouse_query(chi, query="select version()", user="******") with When("Update usersd settings"): start_time = kube_get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") create_and_check("configs/test-016-settings-2.yaml", {"do_not_delete": 1}) with Then("Wait 60 seconds for configmap changes to apply"): time.sleep(60) with Then("test_norestart user should be available"): clickhouse_query(chi, query="select version()", user="******") with And("ClickHouse should not be restarted"): new_start_time = kube_get_field("pod", f"chi-{chi}-default-0-0-0", ".status.startTime") assert start_time == new_start_time kube_delete_chi("test-016-settings")
def test_backup_not_run(self): not_run_pod, _, _, _ = alerts.random_pod_choice_for_callbacks(chi) apply_fake_backup("prepare fake backup for time metric") with Then(f"wait {not_run_pod} ready"): kubectl.wait_field("pod", not_run_pod, ".spec.containers[1].image", "nginx:latest") kubectl.wait_field("pod", not_run_pod, ".status.containerStatuses[1].ready", "true") with Then(f"setup {not_run_pod} backup create end time"): kubectl.launch( f'exec {not_run_pod} -c clickhouse-backup -- bash -xc \'' 'echo "# HELP clickhouse_backup_last_create_finish Last backup create finish timestamp" > /usr/share/nginx/html/metrics && ' 'echo "# TYPE clickhouse_backup_last_create_finish gauge" >> /usr/share/nginx/html/metrics && ' f'echo "clickhouse_backup_last_create_finish {int((datetime.datetime.now() - datetime.timedelta(days=2)).timestamp())}" >> /usr/share/nginx/html/metrics ' '\'') fired = alerts.wait_alert_state( "ClickhouseBackupDoesntRunTooLong", "firing", expected_state=True, sleep_time=settings.prometheus_scrape_interval, labels={"pod_name": not_run_pod}, time_range='60s') assert fired, error( "can't get ClickhouseBackupDoesntRunTooLong alert in firing state") apply_normal_backup() backup_name = prepare_table_for_backup(not_run_pod) wait_backup_pod_ready_and_curl_installed(not_run_pod) with When('Backup is success'): exec_on_backup_container( not_run_pod, f'curl -X POST -sL "http://127.0.0.1:7171/backup/create?name={backup_name}"' ) wait_backup_command_status(not_run_pod, f'create {backup_name}', expected_status='success') exec_on_backup_container( not_run_pod, f'curl -X POST -sL "http://127.0.0.1:7171/backup/upload/{backup_name}"' ) wait_backup_command_status(not_run_pod, f'upload {backup_name}', expected_status='success') with Then("check ClickhouseBackupDoesntRunTooLong gone away"): resolved = alerts.wait_alert_state("ClickhouseBackupDoesntRunTooLong", "firing", expected_state=False, labels={"pod_name": not_run_pod}) assert resolved, error( "can't get ClickhouseBackupDoesntRunTooLong alert is gone away")
def test_011_1(): with Given( "test-011-secured-default.yaml with password_sha256_hex for default user" ): create_and_check("configs/test-011-secured-default.yaml", { "pod_count": 1, "do_not_delete": 1 }) with Then("Default user password should be '_removed_'"): chi = kube_get("chi", "test-011-secured-default") assert "default/password" in chi["status"]["normalized"][ "configuration"]["users"] assert chi["status"]["normalized"]["configuration"]["users"][ "default/password"] == "_removed_" with And("Connection to localhost should succeed with default user"): out = clickhouse_query_with_error( "test-011-secured-default", "select 'OK'", pwd="clickhouse_operator_password") assert out == 'OK' with When("Trigger installation update"): create_and_check("configs/test-011-secured-default-2.yaml", {"do_not_delete": 1}) with Then("Default user password should be '_removed_'"): chi = kube_get("chi", "test-011-secured-default") assert "default/password" in chi["status"]["normalized"][ "configuration"]["users"] assert chi["status"]["normalized"]["configuration"]["users"][ "default/password"] == "_removed_" with When("Default user is assigned the different profile"): create_and_check("configs/test-011-secured-default-3.yaml", {"do_not_delete": 1}) with Then( "Connection to localhost should succeed with default user" ): out = clickhouse_query_with_error("test-011-secured-default", "select 'OK'") assert out == 'OK' kube_delete_chi("test-011-secured-default")
def sanity_check(self, connection): """Check connection to the database. """ with Given("PyODBC connection"): with When("I do 'SELECT 1'"): rows = connection.query("SELECT 1") result = "[(1, )]" with Then(f"the result is {result}", format_name=False): assert repr(rows) == result, error("result dit not match")
def test_019(config="configs/test-019-retain-volume.yaml"): chi = get_chi_name(get_full_path(config)) create_and_check(config, {"pod_count": 1, "do_not_delete": 1}) clickhouse_query(chi, query="create table test Engine = Log as select 1 as a") with When("CHI with ratained volume is deleted"): kube_delete_chi(chi) with Then("PVC should be retained"): assert kube_get_count("pvc") == 1 assert kube_get_count("pv") == 1 with When("Re-create CHI"): create_and_check(config, {"pod_count": 1, "do_not_delete": 1}) with Then("PVC should be re-mounted and data should be in place"): out = clickhouse_query(chi, query="select a from test") assert out == "1" kube_delete_chi(chi)
def test_detached_parts(self): clickhouse.create_table_on_cluster(chi) detached_pod, detached_svc, _, _ = alerts.random_pod_choice_for_callbacks( chi) def create_part_and_detach(): clickhouse.query( chi["metadata"]["name"], "INSERT INTO default.test SELECT now(), number FROM numbers(100)", pod=detached_pod) part_name = clickhouse.query( chi["metadata"]["name"], sql= "SELECT name FROM system.parts WHERE database='default' AND table='test' ORDER BY modification_time DESC LIMIT 1", pod=detached_pod) clickhouse.query(chi["metadata"]["name"], f"ALTER TABLE default.test DETACH PART '{part_name}'", pod=detached_pod) def attach_all_parts(): detached_parts = clickhouse.query( chi["metadata"]["name"], "SELECT name FROM system.detached_parts WHERE database='default' AND table='test' AND reason=''", pod=detached_pod) all_parts = "" for part in detached_parts.splitlines(): all_parts += f"ALTER TABLE default.test ATTACH PART '{part}';" if all_parts.strip() != "": clickhouse.query(chi["metadata"]["name"], all_parts, pod=detached_pod) with When("check ClickHouseDetachedParts firing"): fired = alerts.wait_alert_state("ClickHouseDetachedParts", "firing", True, labels={ "hostname": detached_svc, "chi": chi["metadata"]["name"] }, time_range='30s', callback=create_part_and_detach) assert fired, error( "can't get ClickHouseDetachedParts alert in firing state") with Then("check ClickHouseDetachedParts gone away"): resolved = alerts.wait_alert_state("ClickHouseDetachedParts", "firing", False, labels={"hostname": detached_svc}, callback=attach_all_parts) assert resolved, error( "can't check ClickHouseDetachedParts alert is gone away") clickhouse.drop_table_on_cluster(chi)
def insert_distributed_sync(): with When("Insert to distributed table SYNC"): # look to https://github.com/ClickHouse/ClickHouse/pull/14260#issuecomment-683616862 # insert_sql = 'INSERT INTO default.test_distr SELECT now(), number FROM numbers(toUInt64(5e9))' insert_sql = "INSERT INTO FUNCTION remote('127.1', currentDatabase(), test_distr) SELECT now(), number FROM numbers(toUInt64(5e9))" insert_params = '--insert_distributed_timeout=1 --insert_distributed_sync=1' error = clickhouse.query_with_error( chi["metadata"]["name"], insert_sql, pod=sync_pod, host=sync_pod, ns=kubectl.namespace, advanced_params=insert_params ) assert 'Code: 159' in error
def restart_clickhouse_and_insert_to_replicated_table(): with When(f"stop replica fetches on {stop_replica_svc}"): sql = "SYSTEM STOP FETCHES default.test_repl" kubectl.launch( f"exec -n {kubectl.namespace} {stop_replica_pod} -c clickhouse -- clickhouse-client -q \"{sql}\"", ok_to_fail=True, ) sql = "INSERT INTO default.test_repl SELECT now(), number FROM numbers(100000)" kubectl.launch( f"exec -n {kubectl.namespace} {insert_pod} -c clickhouse -- clickhouse-client -q \"{sql}\"", )
def delete_chi(chi, ns=namespace): with When(f"Delete daisyinstallation {chi}"): launch(f"delete daisyinstallation {chi}", ns=ns, timeout=900) wait_objects( chi, { "statefulset": 0, "pod": 0, "service": 0, }, ns, )