def test_backup_not_run(self, chi, minio_spec):
    not_run_pod, _, _, _ = alerts.random_pod_choice_for_callbacks(chi)
    apply_fake_backup("prepare fake backup for time metric")

    with Then(f"wait {not_run_pod} ready"):
        kubectl.wait_field("pod", not_run_pod, ".spec.containers[1].image",
                           "nginx:latest")
        kubectl.wait_field("pod", not_run_pod,
                           ".status.containerStatuses[1].ready", "true")

    with Then(f"setup {not_run_pod} backup create end time"):
        kubectl.launch(
            f'exec {not_run_pod} -c clickhouse-backup -- bash -xc \''
            'echo "# HELP clickhouse_backup_last_create_finish Last backup create finish timestamp" > /usr/share/nginx/html/metrics && '
            'echo "# TYPE clickhouse_backup_last_create_finish gauge" >> /usr/share/nginx/html/metrics && '
            f'echo "clickhouse_backup_last_create_finish {int((datetime.datetime.now() - datetime.timedelta(days=2)).timestamp())}" >> /usr/share/nginx/html/metrics '
            '\'')

        fired = alerts.wait_alert_state(
            "ClickhouseBackupDoesntRunTooLong",
            "firing",
            expected_state=True,
            sleep_time=settings.prometheus_scrape_interval,
            labels={"pod_name": not_run_pod},
            time_range='60s')
        assert fired, error(
            "can't get ClickhouseBackupDoesntRunTooLong alert in firing state")

    apply_normal_backup()

    backup_name = prepare_table_for_backup(not_run_pod, chi)
    wait_backup_pod_ready_and_curl_installed(not_run_pod)

    with When('Backup is success'):
        exec_on_backup_container(
            not_run_pod,
            f'curl -X POST -sL "http://127.0.0.1:7171/backup/create?name={backup_name}"'
        )
        wait_backup_command_status(not_run_pod,
                                   f'create {backup_name}',
                                   expected_status='success')

        exec_on_backup_container(
            not_run_pod,
            f'curl -X POST -sL "http://127.0.0.1:7171/backup/upload/{backup_name}"'
        )
        wait_backup_command_status(not_run_pod,
                                   f'upload {backup_name}',
                                   expected_status='success')

    with Then("check ClickhouseBackupDoesntRunTooLong gone away"):
        resolved = alerts.wait_alert_state("ClickhouseBackupDoesntRunTooLong",
                                           "firing",
                                           expected_state=False,
                                           labels={"pod_name": not_run_pod})
        assert resolved, error(
            "can't get ClickhouseBackupDoesntRunTooLong alert is gone away")
def wait_backup_pod_ready_and_curl_installed(backup_pod):
    with Then(f"wait {backup_pod} ready"):
        kubectl.wait_field("pod", backup_pod,
                           ".status.containerStatuses[1].ready", "true")
        kubectl.launch(
            f'exec {backup_pod} -c clickhouse-backup -- curl --version')
def test_backup_duration(self, chi, minio_spec):
    short_pod, _, long_pod, _ = alerts.random_pod_choice_for_callbacks(chi)
    apply_fake_backup("prepare fake backup duration metric")

    for pod in [short_pod, long_pod]:
        with Then(f"wait {pod} ready"):
            kubectl.wait_field("pod", pod, ".spec.containers[1].image",
                               "nginx:latest")
            kubectl.wait_field("pod", pod,
                               ".status.containerStatuses[1].ready", "true")

            fired = alerts.wait_alert_state(
                "ClickHouseBackupTooLong",
                "firing",
                expected_state=True,
                sleep_time=settings.prometheus_scrape_interval,
                labels={"pod_name": pod},
                time_range='60s')
            assert fired, error(
                f"can't get ClickHouseBackupTooLong alert in firing state for {pod}"
            )

    with Then(f"wait when prometheus will scrape fake data"):
        time.sleep(70)

    with Then(f"decrease {short_pod} backup duration"):
        kubectl.launch(
            f'exec {short_pod} -c clickhouse-backup -- bash -xc \''
            'echo "# HELP clickhouse_backup_last_create_duration Backup create duration in nanoseconds" > /usr/share/nginx/html/metrics && '
            'echo "# TYPE clickhouse_backup_last_create_duration gauge" >> /usr/share/nginx/html/metrics && '
            'echo "clickhouse_backup_last_create_duration 7000000000000" >> /usr/share/nginx/html/metrics && '
            'echo "# HELP clickhouse_backup_last_create_status Last backup create status: 0=failed, 1=success, 2=unknown" >> /usr/share/nginx/html/metrics && '
            'echo "# TYPE clickhouse_backup_last_create_status gauge" >> /usr/share/nginx/html/metrics && '
            'echo "clickhouse_backup_last_create_status 1" >> /usr/share/nginx/html/metrics'
            '\'')

        fired = alerts.wait_alert_state(
            "ClickHouseBackupTooShort",
            "firing",
            expected_state=True,
            sleep_time=settings.prometheus_scrape_interval,
            labels={"pod_name": short_pod},
            time_range='60s')
        assert fired, error(
            "can't get ClickHouseBackupTooShort alert in firing state")

    apply_normal_backup()

    with Then("check ClickHouseBackupTooShort gone away"):
        resolved = alerts.wait_alert_state("ClickHouseBackupTooShort",
                                           "firing",
                                           expected_state=False,
                                           labels={"pod_name": short_pod})
        assert resolved, error(
            "can't get ClickHouseBackupTooShort alert is gone away")

    with Then("check ClickHouseBackupTooLong gone away"):
        resolved = alerts.wait_alert_state("ClickHouseBackupTooLong",
                                           "firing",
                                           expected_state=False,
                                           labels={"pod_name": long_pod})
        assert resolved, error(
            "can't get ClickHouseBackupTooLong alert is gone away")
def test_metrics_exporter_reboot(self):
    def check_monitoring_chi(operator_namespace,
                             operator_pod,
                             expect_result,
                             max_retries=10):
        with Then(
                f"metrics-exporter /chi endpoint result should return {expect_result}"
        ):
            for i in range(1, max_retries):
                # check /metrics for try to refresh monitored instances
                url_cmd = util.make_http_get_request("127.0.0.1", "8888",
                                                     "/metrics")
                kubectl.launch(
                    f"exec {operator_pod} -c metrics-exporter -- {url_cmd}",
                    ns=operator_namespace)
                # check /chi after refresh monitored instances
                url_cmd = util.make_http_get_request("127.0.0.1", "8888",
                                                     "/chi")
                out = kubectl.launch(
                    f"exec {operator_pod} -c metrics-exporter -- {url_cmd}",
                    ns=operator_namespace)
                out = json.loads(out)
                if out == expect_result:
                    break
                with Then("Not ready. Wait for " + str(i * 5) + " seconds"):
                    time.sleep(i * 5)
            assert out == expect_result, error()

    with Given("clickhouse-operator is installed"):
        kubectl.wait_field("pods",
                           util.operator_label,
                           ".status.containerStatuses[*].ready",
                           "true,true",
                           ns=settings.operator_namespace)
        assert kubectl.get_count("pod",
                                 ns='--all-namespaces',
                                 label=util.operator_label) > 0, error()

        out = kubectl.launch("get pods -l app=clickhouse-operator",
                             ns=settings.operator_namespace).splitlines()[1]
        operator_pod = re.split(r'[\t\r\n\s]+', out)[0]
        operator_namespace = settings.operator_namespace
        kubectl.delete_ns(kubectl.namespace, ok_to_fail=True)
        kubectl.create_ns(kubectl.namespace)
        check_monitoring_chi(operator_namespace, operator_pod, [])
        with And("created simple clickhouse installation"):
            manifest = "../../docs/chi-examples/01-simple-layout-01-1shard-1repl.yaml"
            kubectl.create_and_check(manifest=manifest,
                                     check={
                                         "object_counts": {
                                             "statefulset": 1,
                                             "pod": 1,
                                             "service": 2,
                                         },
                                         "do_not_delete": True,
                                     })
            expected_chi = [{
                "namespace":
                "test",
                "name":
                "simple-01",
                "hostnames":
                ["chi-simple-01-simple-0-0.test.svc.cluster.local"]
            }]
            check_monitoring_chi(operator_namespace, operator_pod,
                                 expected_chi)
            with When("reboot metrics exporter"):
                kubectl.launch(
                    f"exec -n {operator_namespace} {operator_pod} -c metrics-exporter -- bash -c 'kill 1'"
                )
                time.sleep(15)
                kubectl.wait_field("pods",
                                   util.operator_label,
                                   ".status.containerStatuses[*].ready",
                                   "true,true",
                                   ns=settings.operator_namespace)
                with Then("check metrics exporter still contains chi objects"):
                    check_monitoring_chi(operator_namespace, operator_pod,
                                         expected_chi)
                    kubectl.delete(util.get_full_path(manifest,
                                                      lookup_in_host=False),
                                   timeout=600)
                    check_monitoring_chi(operator_namespace, operator_pod, [])