コード例 #1
0
ファイル: node.py プロジェクト: nimrod-becker/ocs-ci
def wait_for_nodes_status(node_names=None, status=constants.NODE_READY, timeout=180):
    """
    Wait until all nodes are in the given status

    Args:
        node_names (list): The node names to wait for to reached the desired state
            If None, will wait for all cluster nodes
        status (str): The node status to wait for
            (e.g. 'Ready', 'NotReady', 'SchedulingDisabled')
        timeout (int): The number in seconds to wait for the nodes to reach
            the status

    Raises:
        ResourceWrongStatusException: In case one or more nodes haven't
            reached the desired state

    """
    if not node_names:
        node_names = [node.name for node in get_node_objs()]

    log.info(f"Waiting for nodes {node_names} to reach status {status}")
    try:
        for sample in TimeoutSampler(timeout, 3, get_node_objs, node_names):
            for node in sample:
                if node.ocp.get_resource_status(node.name) == status:
                    node_names.remove(node.name)
            if not node_names:
                break

    except TimeoutExpiredError:
        log.error(f"The following nodes haven't reached status {status}: {node_names}")
        raise exceptions.ResourceWrongStatusException(
            node_names, [n.describe() for n in get_node_objs(node_names)]
        )
コード例 #2
0
ファイル: perftests.py プロジェクト: ramkiperiy/ocs-ci
    def wait_for_wl_to_finish(self, timeout=18000, sleep=300):
        """
        Waiting until the workload is finished and get the test log

        Args:
            timeout (int): time in second to wait until the benchmark start
            sleep (int): Sleep interval seconds

        Raise:
            exception for too much restarts of the test.
            ResourceWrongStatusException : test Failed / Error
            TimeoutExpiredError : test did not completed on time.

        """
        log.info(f"Waiting for {self.client_pod_name} to complete")

        Finished = 0
        restarts = 0
        total_time = timeout
        while not Finished and total_time > 0:
            results = run_oc_command(
                "get pod --no-headers -o custom-columns=:metadata.name,:status.phase",
                namespace=benchmark_operator.BMO_NAME,
            )
            (fname, status) = ["", ""]
            for name in results:
                # looking for the pod which run the benchmark (not the IO)
                # this pod contain the `client` in his name, and there is only one
                # pod like this, other pods have the `server` in the name.
                (fname, status) = name.split()
                if re.search("client", fname):
                    break
                else:
                    (fname, status) = ["", ""]

            if fname == "":  # there is no `client` pod !
                err_msg = f"{self.client_pod} Failed to run !!!"
                log.error(err_msg)
                raise Exception(err_msg)

            if not fname == self.client_pod:
                # The client pod name is different from previous check, it was restarted
                log.info(
                    f"The pod {self.client_pod} was restart. the new client pod is {fname}"
                )
                self.client_pod = fname
                restarts += 1
                # in case of restarting the benchmark, reset the timeout as well
                total_time = timeout

            if restarts > 3:  # we are tolerating only 3 restarts
                err_msg = f"Too much restarts of the benchmark ({restarts})"
                log.error(err_msg)
                raise Exception(err_msg)

            if status == "Succeeded":
                # Getting the end time of the benchmark - for reporting.
                self.end_time = self.get_time()
                self.test_logs = self.pod_obj.exec_oc_cmd(
                    f"logs {self.client_pod}", out_yaml_format=False
                )
                log.info(f"{self.client_pod} completed successfully")
                Finished = 1
            elif (
                status != constants.STATUS_RUNNING
                and status != constants.STATUS_PENDING
            ):
                # if the benchmark pod is not in Running state (and not Completed/Pending),
                # no need to wait for timeout.
                # Note: the pod can be in pending state in case of restart.
                err_msg = f"{self.client_pod} Failed to run - ({status})"
                log.error(err_msg)
                raise exceptions.ResourceWrongStatusException(
                    self.client_pod,
                    describe_out=err_msg,
                    column="Status",
                    expected="Succeeded",
                    got=status,
                )
            else:
                log.info(
                    f"{self.client_pod} is in {status} State, and wait to Succeeded State."
                    f" wait another {sleep} sec. for benchmark to complete"
                )
                time.sleep(sleep)
                total_time -= sleep

        if not Finished:
            err_msg = (
                f"{self.client_pod} did not completed on time, "
                f"maybe timeout ({timeout}) need to be increase"
            )
            log.error(err_msg)
            raise exceptions.TimeoutExpiredError(
                self.client_pod, custom_message=err_msg
            )

        # Saving the benchmark internal log into a file at the logs directory
        log_file_name = f"{self.full_log_path}/test-pod.log"
        try:
            with open(log_file_name, "w") as f:
                f.write(self.test_logs)
            log.info(f"The Test log can be found at : {log_file_name}")
        except Exception:
            log.warning(f"Cannot write the log to the file {log_file_name}")
        log.info(f"The {self.benchmark_name} benchmark complete")