コード例 #1
0
def test_version_changed(self):
    changed_pod, changed_svc, _, _ = alerts.random_pod_choice_for_callbacks(
        chi)

    with When("apply changed settings"):
        kubectl.create_and_check(
            config="configs/test-cluster-for-alerts-changed-settings.yaml",
            check={
                "apply_templates": [
                    "templates/tpl-clickhouse-stable.yaml",
                    "templates/tpl-persistent-volume-100Mi.yaml"
                ],
                "object_counts": {
                    "statefulset": 2,
                    "pod": 2,
                    "service": 3,
                },
                "do_not_delete":
                1
            })
        prometheus_scrape_interval = 15
        with Then(
                f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"
        ):
            time.sleep(prometheus_scrape_interval * 2)

    with Then("check ClickHouseVersionChanged firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseVersionChanged",
            "firing",
            True,
            labels={"hostname": changed_svc},
            time_range="30s",
            sleep_time=settings.prometheus_scrape_interval)
        assert fired, error(
            "can't get ClickHouseVersionChanged alert in firing state")

    with When("rollback changed settings"):
        kubectl.create_and_check(
            config="configs/test-cluster-for-alerts.yaml",
            check={
                "apply_templates": [
                    "templates/tpl-clickhouse-latest.yaml",
                    "templates/tpl-clickhouse-alerts.yaml",
                    "templates/tpl-persistent-volume-100Mi.yaml"
                ],
                "object_counts": {
                    "statefulset": 2,
                    "pod": 2,
                    "service": 3,
                },
                "do_not_delete":
                1
            })

    with Then("check ClickHouseVersionChanged gone away"):
        resolved = alerts.wait_alert_state("ClickHouseVersionChanged",
                                           "firing",
                                           False,
                                           labels={"hostname": changed_svc},
                                           sleep_time=30)
        assert resolved, error(
            "can't check ClickHouseVersionChanged alert is gone away")
コード例 #2
0
def test_021(config="configs/test-021-rescale-volume-01.yaml"):
    with Given("Default storage class is expandable"):
        default_storage_class = kubectl.get_default_storage_class()
        assert default_storage_class is not None
        assert len(default_storage_class) > 0
        allow_volume_expansion = kubectl.get_field("storageclass", default_storage_class, ".allowVolumeExpansion")
        if allow_volume_expansion != "true":
            kubectl.launch(f"patch storageclass {default_storage_class} -p '{{\"allowVolumeExpansion\":true}}'")

    chi = manifest.get_chi_name(util.get_full_path(config))
    kubectl.create_and_check(
        config=config,
        check={
            "pod_count": 1,
            "do_not_delete": 1,
        })

    with Then("Storage size should be 100Mi"):
        size = kubectl.get_pvc_size("disk1-chi-test-021-rescale-volume-simple-0-0-0")
        assert size == "100Mi"

    with When("Re-scale volume configuration to 200Mb"):
        kubectl.create_and_check(
            config="configs/test-021-rescale-volume-02-enlarge-disk.yaml",
            check={
                "pod_count": 1,
                "do_not_delete": 1,
            })

        with Then("Storage size should be 200Mi"):
            size = kubectl.get_pvc_size("disk1-chi-test-021-rescale-volume-simple-0-0-0")
            assert size == "200Mi"

    with When("Add second disk 50Mi"):
        kubectl.create_and_check(
            config="configs/test-021-rescale-volume-03-add-disk.yaml",
            check={
                "pod_count": 1,
                "pod_volumes": {
                    "/var/lib/clickhouse",
                    "/var/lib/clickhouse2",
                },
                "do_not_delete": 1,
            })

        with Then("There should be two PVC"):
            size = kubectl.get_pvc_size("disk1-chi-test-021-rescale-volume-simple-0-0-0")
            assert size == "200Mi"
            size = kubectl.get_pvc_size("disk2-chi-test-021-rescale-volume-simple-0-0-0")
            assert size == "50Mi"

        with And("There should be two disks recognized by ClickHouse"):
            # ClickHouse requires some time to mount volume. Race conditions.

            time.sleep(120)
            out = clickhouse.query(chi, "SELECT count() FROM system.disks")
            print("SELECT count() FROM system.disks RETURNED:")
            print(out)
            assert out == "2"

    kubectl.delete_chi(chi)
コード例 #3
0
def test_insert_related_alerts(self):
    clickhouse.create_table_on_cluster(chi)
    delayed_pod, delayed_svc, rejected_pod, rejected_svc = alerts.random_pod_choice_for_callbacks(
        chi)

    prometheus_scrape_interval = settings.prometheus_scrape_interval
    # default values in system.merge_tree_settings
    parts_to_throw_insert = 300
    parts_to_delay_insert = 150
    chi_name = chi["metadata"]["name"]

    parts_limits = parts_to_delay_insert
    selected_svc = delayed_svc

    def insert_many_parts_to_clickhouse():
        stop_merges = "SYSTEM STOP MERGES default.test;"
        min_block = "SET max_block_size=1; SET max_insert_block_size=1; SET min_insert_block_size_rows=1;"
        with When(f"Insert to MergeTree table {parts_limits} parts"):
            r = parts_limits
            sql = stop_merges + min_block + f"INSERT INTO default.test(event_time, test) SELECT now(),number FROM system.numbers LIMIT {r};"
            clickhouse.query(chi_name,
                             sql,
                             host=selected_svc,
                             ns=kubectl.namespace)

            # @TODO we need only one query after resolve https://github.com/ClickHouse/ClickHouse/issues/11384 and switch to 21.3+
            sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
            clickhouse.query_with_error(chi_name,
                                        sql,
                                        host=selected_svc,
                                        ns=kubectl.namespace)
            with Then(
                    f"wait prometheus_scrape_interval={prometheus_scrape_interval}*2 sec"
            ):
                time.sleep(prometheus_scrape_interval * 2)

            sql = min_block + "INSERT INTO default.test(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1;"
            clickhouse.query_with_error(chi_name,
                                        sql,
                                        host=selected_svc,
                                        ns=kubectl.namespace)

    insert_many_parts_to_clickhouse()
    with Then("check ClickHouseDelayedInsertThrottling firing"):
        fired = alerts.wait_alert_state("ClickHouseDelayedInsertThrottling",
                                        "firing",
                                        True,
                                        labels={"hostname": delayed_svc},
                                        time_range="60s")
        assert fired, error(
            "can't get ClickHouseDelayedInsertThrottling alert in firing state"
        )
    with Then("check ClickHouseMaxPartCountForPartition firing"):
        fired = alerts.wait_alert_state("ClickHouseMaxPartCountForPartition",
                                        "firing",
                                        True,
                                        labels={"hostname": delayed_svc},
                                        time_range="90s")
        assert fired, error(
            "can't get ClickHouseMaxPartCountForPartition alert in firing state"
        )
    with Then("check ClickHouseLowInsertedRowsPerQuery firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseLowInsertedRowsPerQuery",
            "firing",
            True,
            labels={"hostname": delayed_svc},
            time_range="120s",
        )
        assert fired, error(
            "can't get ClickHouseLowInsertedRowsPerQuery alert in firing state"
        )

    clickhouse.query(chi_name,
                     "SYSTEM START MERGES default.test",
                     host=selected_svc,
                     ns=kubectl.namespace)

    with Then("check ClickHouseDelayedInsertThrottling gone away"):
        resolved = alerts.wait_alert_state("ClickHouseDelayedInsertThrottling",
                                           "firing",
                                           False,
                                           labels={"hostname": delayed_svc})
        assert resolved, error(
            "can't check ClickHouseDelayedInsertThrottling alert is gone away")
    with Then("check ClickHouseMaxPartCountForPartition gone away"):
        resolved = alerts.wait_alert_state(
            "ClickHouseMaxPartCountForPartition",
            "firing",
            False,
            labels={"hostname": delayed_svc})
        assert resolved, error(
            "can't check ClickHouseMaxPartCountForPartition alert is gone away"
        )
    with Then("check ClickHouseLowInsertedRowsPerQuery gone away"):
        resolved = alerts.wait_alert_state("ClickHouseLowInsertedRowsPerQuery",
                                           "firing",
                                           False,
                                           labels={"hostname": delayed_svc})
        assert resolved, error(
            "can't check ClickHouseLowInsertedRowsPerQuery alert is gone away")

    parts_limits = parts_to_throw_insert
    selected_svc = rejected_svc
    insert_many_parts_to_clickhouse()
    with Then("check ClickHouseRejectedInsert firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseRejectedInsert",
            "firing",
            True,
            labels={"hostname": rejected_svc},
            time_range="30s",
            sleep_time=settings.prometheus_scrape_interval)
        assert fired, error(
            "can't get ClickHouseRejectedInsert alert in firing state")

    with Then("check ClickHouseRejectedInsert gone away"):
        resolved = alerts.wait_alert_state("ClickHouseRejectedInsert",
                                           "firing",
                                           False,
                                           labels={"hostname": rejected_svc})
        assert resolved, error(
            "can't check ClickHouseRejectedInsert alert is gone away")

    clickhouse.query(chi_name,
                     "SYSTEM START MERGES default.test",
                     host=selected_svc,
                     ns=kubectl.namespace)
    clickhouse.drop_table_on_cluster(chi)
コード例 #4
0
def test_read_only_replica(self):
    read_only_pod, read_only_svc, other_pod, other_svc = alerts.random_pod_choice_for_callbacks(
        chi)
    chi_name = chi["metadata"]["name"]
    clickhouse.create_table_on_cluster(
        chi, 'all-replicated', 'default.test_repl',
        '(event_time DateTime, test UInt64) ' +
        'ENGINE ReplicatedMergeTree(\'/clickhouse/tables/{installation}-{shard}/test_repl\', \'{replica}\') ORDER BY tuple()'
    )

    def restart_zookeeper():
        kubectl.launch(
            f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"kill 1\"",
            ok_to_fail=True,
        )
        clickhouse.query_with_error(
            chi_name,
            "INSERT INTO default.test_repl VALUES(now(),rand())",
            host=read_only_svc)

    with Then("check ClickHouseReadonlyReplica firing"):
        fired = alerts.wait_alert_state(
            "ClickHouseReadonlyReplica",
            "firing",
            True,
            labels={"hostname": read_only_svc},
            time_range='30s',
            sleep_time=settings.prometheus_scrape_interval,
            callback=restart_zookeeper)
        assert fired, error(
            "can't get ClickHouseReadonlyReplica alert in firing state")
    with Then("check ClickHouseReadonlyReplica gone away"):
        resolved = alerts.wait_alert_state("ClickHouseReadonlyReplica",
                                           "firing",
                                           False,
                                           labels={"hostname": read_only_svc})
        assert resolved, error(
            "can't check ClickHouseReadonlyReplica alert is gone away")

    kubectl.wait_pod_status("zookeeper-0", "Running", ns=kubectl.namespace)
    kubectl.wait_jsonpath("pod",
                          "zookeeper-0",
                          "{.status.containerStatuses[0].ready}",
                          "true",
                          ns=kubectl.namespace)

    for i in range(11):
        zookeeper_status = kubectl.launch(
            f"exec -n {kubectl.namespace} zookeeper-0 -- sh -c \"echo ruok | nc 127.0.0.1 2181\"",
            ok_to_fail=True)
        if "imok" in zookeeper_status:
            break
        elif i == 10:
            fail(f"invalid zookeeper status after {i} retries")
        with Then("zookeper is not ready, wait 2 seconds"):
            time.sleep(2)

    clickhouse.query_with_error(
        chi_name,
        "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl",
        host=read_only_svc,
        timeout=240)
    clickhouse.query_with_error(
        chi_name,
        "SYSTEM RESTART REPLICAS; SYSTEM SYNC REPLICA default.test_repl",
        host=other_svc,
        timeout=240)

    clickhouse.drop_table_on_cluster(chi, 'all-replicated',
                                     'default.test_repl')
コード例 #5
0
def test_metrics_exporter_reboot():
    def check_monitoring_chi(operator_namespace,
                             operator_pod,
                             expect_result,
                             max_retries=10):
        with And(
                f"metrics-exporter /chi enpoint result should return {expect_result}"
        ):
            for i in range(1, max_retries):
                out = kubectl.kubectl(
                    f"exec {operator_pod} -c metrics-exporter wget -- -O- -q http://127.0.0.1:8888/chi",
                    ns=operator_namespace)
                out = json.loads(out)
                if out == expect_result:
                    break
                with Then("Not ready. Wait for " + str(i * 5) + " seconds"):
                    time.sleep(i * 5)
            assert out == expect_result, error()

    with Given("clickhouse-operator is installed"):
        kubectl.kube_wait_field("pods",
                                "-l app=clickhouse-operator",
                                ".status.containerStatuses[*].ready",
                                "true,true",
                                ns=settings.operator_namespace)
        assert kubectl.kube_get_count(
            "pod", ns='--all-namespaces',
            label="-l app=clickhouse-operator") > 0, error()

        out = kubectl.kubectl("get pods -l app=clickhouse-operator",
                              ns=settings.operator_namespace).splitlines()[1]
        operator_pod = re.split(r'[\t\r\n\s]+', out)[0]
        operator_namespace = settings.operator_namespace
        kubectl.kube_deletens(kubectl.namespace)
        kubectl.kube_createns(kubectl.namespace)
        check_monitoring_chi(operator_namespace, operator_pod, [])
        with And("created simple clickhouse installation"):
            config = kubectl.get_full_path(
                "../docs/chi-examples/01-simple-layout-01-1shard-1repl.yaml")
            kubectl.create_and_check(config, {
                "object_counts": [1, 1, 2],
                "do_not_delete": True
            })
            expected_chi = [{
                "namespace":
                "test",
                "name":
                "simple-01",
                "hostnames":
                ["chi-simple-01-cluster-0-0.test.svc.cluster.local"]
            }]
            check_monitoring_chi(operator_namespace, operator_pod,
                                 expected_chi)
            with When("reboot metrics exporter"):
                kubectl.kubectl(
                    f"exec -n {operator_namespace} {operator_pod} -c metrics-exporter reboot"
                )
                time.sleep(15)
                kubectl.kube_wait_field("pods",
                                        "-l app=clickhouse-operator",
                                        ".status.containerStatuses[*].ready",
                                        "true,true",
                                        ns=settings.operator_namespace)
                with Then("check metrics exporter still contains chi objects"):
                    check_monitoring_chi(operator_namespace, operator_pod,
                                         expected_chi)
                    kubectl.kube_delete(config)
                    check_monitoring_chi(operator_namespace, operator_pod, [])
コード例 #6
0
def test_distributed_files_to_insert(self):
    delayed_pod, delayed_svc, restarted_pod, restarted_svc = alerts.random_pod_choice_for_callbacks(
        chi)
    clickhouse.create_distributed_table_on_cluster(chi)

    insert_sql = 'INSERT INTO default.test_distr(event_time, test) SELECT now(), number FROM system.numbers LIMIT 1000'
    clickhouse.query(chi["metadata"]["name"],
                     'SYSTEM STOP DISTRIBUTED SENDS default.test_distr',
                     pod=delayed_pod,
                     ns=kubectl.namespace)

    files_to_insert_from_metrics = 0
    files_to_insert_from_disk = 0
    tries = 0
    # we need more than 50 delayed files for catch
    while files_to_insert_from_disk <= 55 and files_to_insert_from_metrics <= 55 and tries < 500:
        kubectl.launch(
            f"exec -n {kubectl.namespace} {restarted_pod} -c clickhouse-pod -- kill 1",
            ok_to_fail=True,
        )
        clickhouse.query(chi["metadata"]["name"],
                         insert_sql,
                         pod=delayed_pod,
                         host=delayed_pod,
                         ns=kubectl.namespace)
        files_to_insert_from_metrics = clickhouse.query(
            chi["metadata"]["name"],
            "SELECT value FROM system.metrics WHERE metric='DistributedFilesToInsert'",
            pod=delayed_pod,
            ns=kubectl.namespace)
        files_to_insert_from_metrics = int(files_to_insert_from_metrics)

        files_to_insert_from_disk = int(
            kubectl.launch(
                f"exec -n {kubectl.namespace} {delayed_pod} -c clickhouse-pod -- bash -c 'ls -la /var/lib/clickhouse/data/default/test_distr/*/*.bin 2>/dev/null | wc -l'",
                ok_to_fail=False,
            ))

    with When("reboot clickhouse-server pod"):
        fired = alerts.wait_alert_state(
            "ClickHouseDistributedFilesToInsertHigh",
            "firing",
            True,
            labels={
                "hostname": delayed_svc,
                "chi": chi["metadata"]["name"]
            })
        assert fired, error(
            "can't get ClickHouseDistributedFilesToInsertHigh alert in firing state"
        )

    kubectl.wait_pod_status(restarted_pod, "Running", ns=kubectl.namespace)

    clickhouse.query(chi["metadata"]["name"],
                     'SYSTEM START DISTRIBUTED SENDS default.test_distr',
                     pod=delayed_pod,
                     ns=kubectl.namespace)

    with Then("check ClickHouseDistributedFilesToInsertHigh gone away"):
        resolved = alerts.wait_alert_state(
            "ClickHouseDistributedFilesToInsertHigh",
            "firing",
            False,
            labels={"hostname": delayed_svc})
        assert resolved, error(
            "can't check ClickHouseDistributedFilesToInsertHigh alert is gone away"
        )

    clickhouse.drop_distributed_table_on_cluster(chi)
コード例 #7
0
def check_datatype(connection,
                   datatype,
                   values,
                   nullable=False,
                   quote=False,
                   repr=str,
                   encoding="utf-8",
                   expected=None):
    """Check support for a data type.
    """
    if expected is None:
        expected = dict()

    if nullable:
        datatype = f"Nullable({datatype})"
        values.append(NULL)

        if expected:
            expected["all"] = expected['all'].rsplit("]", 1)[0] + ", (None, )]"
            expected[NULL] = "[(None, )]"

    with Given("PyODBC connection"):
        with Given(f"parameters",
                   description=f"""
            values {values}
            expected data {expected}
            """):

            with Given(f"table with a column of data type {datatype}"):
                connection.query("DROP TABLE IF EXISTS ps", fetch=False)
                connection.query(
                    f"CREATE TABLE ps (v {datatype}) ENGINE = Memory",
                    fetch=False)
                try:
                    connection.connection.setencoding(encoding=encoding)
                    for v in values:
                        with When(f"I insert value {repr(v)}", flags=TE):
                            # connection.query("INSERT INTO ps VALUES (?)", [v], fetch=False)
                            if quote:
                                connection.query(
                                    f"INSERT INTO ps VALUES ('{repr(v)}')",
                                    fetch=False)
                            else:
                                connection.query(
                                    f"INSERT INTO ps VALUES ({repr(v)})",
                                    fetch=False)

                    with When("I select all values", flags=TE):
                        rows = connection.query("SELECT * FROM ps ORDER BY v")
                        if expected.get("all") is not None:
                            with Then(f"the result is {expected.get('all')}",
                                      flags=TE):
                                assert repr(rows) == expected.get(
                                    "all"), error("result did not match")

                    with When(f"I have values {repr(values)}"):
                        for v in values:
                            if v is NULL:
                                # comparing to NULL is not valid in SQL
                                continue
                            with When(f"I select value {repr(v)}", flags=TE):
                                rows = connection.query(
                                    "SELECT * FROM ps WHERE v = ? ORDER BY v",
                                    [v])
                                if expected.get(v) is not None:
                                    with Then(
                                            f"the result is {repr(expected.get(v))}",
                                            flags=TE):
                                        assert repr(rows) == expected.get(
                                            v), error("result did not match")
                finally:
                    connection.connection.setencoding(
                        encoding=connection.encoding)
                    connection.query("DROP TABLE ps", fetch=False)
コード例 #8
0
def wait_backup_pod_ready_and_curl_installed(backup_pod):
    with Then(f"wait {backup_pod} ready"):
        kubectl.wait_field("pod", backup_pod,
                           ".status.containerStatuses[1].ready", "true")
        kubectl.launch(
            f'exec {backup_pod} -c clickhouse-backup -- curl --version')
コード例 #9
0
def kube_check_pod_image(chi_name, image, ns="test"):
    pod_image = kube_get_pod_image(chi_name, ns)
    with Then(f"Expect pod image {pod_image} to match {image}"):
        assert pod_image == image
コード例 #10
0
def kube_check_service(service_name, service_type, ns="test"):
    with When(f"{service_name} is available"):
        service = kube_get("service", service_name, ns=ns)
        with Then(f"Service type is {service_type}"):
            assert service["spec"]["type"] == service_type
コード例 #11
0
def kube_check_pod_ports(chi_name, ports, ns="test"):
    pod_ports = kube_get_pod_ports(chi_name, ns)
    with Then(f"Expect pod ports {pod_ports} to match {ports}"):
        assert pod_ports.sort() == ports.sort()
コード例 #12
0
def kube_delete(config, ns="test"):
    with When(f"{config} is deleted"):
        cmd = shell(f"{kubectlcmd} delete -n {ns} -f {config}")
    with Then("exitcode should be 0"):
        assert cmd.exitcode == 0, error()
コード例 #13
0
def kube_apply(config, ns="test"):
    with When(f"{config} is applied"):
        cmd = shell(f"{kubectlcmd} apply -n {ns} -f {config}")
    with Then("exitcode should be 0"):
        assert cmd.exitcode == 0, error()
コード例 #14
0
def test_008():
    with Then("Test simple chi for operator restart"):
        test_operator_restart("configs/test-008-operator-restart-1.yaml")
    with Then("Test advanced chi for operator restart"):
        test_operator_restart("configs/test-008-operator-restart-2.yaml")
コード例 #15
0
def test_metrics_exporter_with_multiple_clickhouse_version():
    def check_monitoring_metrics(operator_namespace,
                                 operator_pod,
                                 expect_result,
                                 max_retries=10):
        with And(
                f"metrics-exporter /metrics enpoint result should match with {expect_result}"
        ):
            for i in range(1, max_retries):
                out = kubectl.kubectl(
                    f"exec {operator_pod} -c metrics-exporter wget -- -O- -q http://127.0.0.1:8888/metrics",
                    ns=operator_namespace)
                all_strings_expected_done = True
                for string, exists in expect_result.items():
                    all_strings_expected_done = (exists == (string in out))
                    if not all_strings_expected_done:
                        break

                if all_strings_expected_done:
                    break
                with Then("Not ready. Wait for " + str(i * 5) + " seconds"):
                    time.sleep(i * 5)
            assert all_strings_expected_done, error()

    with Given("clickhouse-operator pod exists"):
        out = kubectl.kubectl("get pods -l app=clickhouse-operator",
                              ns='kube-system').splitlines()[1]
        operator_pod = re.split(r'[\t\r\n\s]+', out)[0]
        operator_namespace = "kube-system"

        with Then("check empty /metrics"):
            kubectl.kube_deletens(kubectl.namespace)
            kubectl.kube_createns(kubectl.namespace)
            check_monitoring_metrics(
                operator_namespace,
                operator_pod,
                expect_result={
                    'chi_clickhouse_metric_VersionInteger': False,
                })

        with Then("Install multiple clickhouse version"):
            config = kubectl.get_full_path(
                "configs/test-017-multi-version.yaml")
            kubectl.create_and_check(config, {
                "object_counts": [4, 4, 5],
                "do_not_delete": True
            })
            with And("Check not empty /metrics"):
                check_monitoring_metrics(
                    operator_namespace,
                    operator_pod,
                    expect_result={
                        '# HELP chi_clickhouse_metric_VersionInteger':
                        True,
                        '# TYPE chi_clickhouse_metric_VersionInteger gauge':
                        True,
                        'chi_clickhouse_metric_VersionInteger{chi="test-017-multi-version",hostname="chi-test-017-multi-version-default-0-0':
                        True,
                        'chi_clickhouse_metric_VersionInteger{chi="test-017-multi-version",hostname="chi-test-017-multi-version-default-1-0':
                        True,
                        'chi_clickhouse_metric_VersionInteger{chi="test-017-multi-version",hostname="chi-test-017-multi-version-default-2-0':
                        True,
                        'chi_clickhouse_metric_VersionInteger{chi="test-017-multi-version",hostname="chi-test-017-multi-version-default-3-0':
                        True,
                    })

        with Then("check empty /metrics after delete namespace"):
            kubectl.kube_deletens(kubectl.namespace)
            check_monitoring_metrics(
                operator_namespace,
                operator_pod,
                expect_result={
                    'chi_clickhouse_metric_VersionInteger': False,
                })
コード例 #16
0
def test_009(version_from="0.11.0", version_to=settings.operator_version):
    with Then("Test simple chi for operator upgrade"):
        test_operator_upgrade("configs/test-009-operator-upgrade-1.yaml", version_from, version_to)
    with Then("Test advanced chi for operator upgrade"):
        test_operator_upgrade("configs/test-009-operator-upgrade-2.yaml", version_from, version_to)
コード例 #17
0
def test_backup_duration(self):
    short_pod, _, long_pod, _ = alerts.random_pod_choice_for_callbacks(chi)
    apply_fake_backup("prepare fake backup duration metric")

    for pod in [short_pod, long_pod]:
        with Then(f"wait {pod} ready"):
            kubectl.wait_field("pod", pod, ".spec.containers[1].image",
                               "nginx:latest")
            kubectl.wait_field("pod", pod,
                               ".status.containerStatuses[1].ready", "true")

            fired = alerts.wait_alert_state(
                "ClickHouseBackupTooLong",
                "firing",
                expected_state=True,
                sleep_time=settings.prometheus_scrape_interval,
                labels={"pod_name": pod},
                time_range='60s')
            assert fired, error(
                f"can't get ClickHouseBackupTooLong alert in firing state for {pod}"
            )

    with Then(f"wait when prometheus will scrape fake data"):
        time.sleep(70)

    with Then(f"decrease {short_pod} backup duration"):
        kubectl.launch(
            f'exec {short_pod} -c clickhouse-backup -- bash -xc \''
            'echo "# HELP clickhouse_backup_last_create_duration Backup create duration in nanoseconds" > /usr/share/nginx/html/metrics && '
            'echo "# TYPE clickhouse_backup_last_create_duration gauge" >> /usr/share/nginx/html/metrics && '
            'echo "clickhouse_backup_last_create_duration 7000000000000" >> /usr/share/nginx/html/metrics && '
            'echo "# HELP clickhouse_backup_last_create_status Last backup create status: 0=failed, 1=success, 2=unknown" >> /usr/share/nginx/html/metrics && '
            'echo "# TYPE clickhouse_backup_last_create_status gauge" >> /usr/share/nginx/html/metrics && '
            'echo "clickhouse_backup_last_create_status 1" >> /usr/share/nginx/html/metrics'
            '\'')

        fired = alerts.wait_alert_state(
            "ClickHouseBackupTooShort",
            "firing",
            expected_state=True,
            sleep_time=settings.prometheus_scrape_interval,
            labels={"pod_name": short_pod},
            time_range='60s')
        assert fired, error(
            "can't get ClickHouseBackupTooShort alert in firing state")

    apply_normal_backup()

    with Then("check ClickHouseBackupTooShort gone away"):
        resolved = alerts.wait_alert_state("ClickHouseBackupTooShort",
                                           "firing",
                                           expected_state=False,
                                           labels={"pod_name": short_pod})
        assert resolved, error(
            "can't get ClickHouseBackupTooShort alert is gone away")

    with Then("check ClickHouseBackupTooLong gone away"):
        resolved = alerts.wait_alert_state("ClickHouseBackupTooLong",
                                           "firing",
                                           expected_state=False,
                                           labels={"pod_name": long_pod})
        assert resolved, error(
            "can't get ClickHouseBackupTooLong alert is gone away")