コード例 #1
0
    def exec(self, command: List[str]) -> str:
        """
        exec a command in a running pod

        :param command: command to run
        :returns logs
        """
        # we need to check first if the pod is running; otherwise we'd get a nasty 500
        pod = self.get_pod()
        if pod.status.phase != "Running":
            raise SandcastleTimeoutReached(
                "You have reached a timeout: the pod is no longer running.")
        logger.info("command = %s", command)
        unique_dir = None
        if self.mapped_dir:
            unique_dir, target_script_path = self._prepare_mdir_exec(
                command, target_dir=Path(self.mapped_dir.path))
            command = ["bash", str(target_script_path)]
            self._copy_path_to_pod(self.mapped_dir.local_dir, Path(unique_dir))
        # https://github.com/kubernetes-client/python/blob/master/examples/exec.py
        # https://github.com/kubernetes-client/python/issues/812#issuecomment-499423823
        # FIXME: refactor this junk into a dedicated function, ideally to _do_exec
        ws_client: WSClient = self._do_exec(command, preload_content=False)
        try:
            # https://github.com/packit-service/sandcastle/issues/23
            # even with a >0 number or ==0, select tends to block
            ws_client.run_forever(timeout=WEBSOCKET_CALL_TIMEOUT)
            errors = ws_client.read_channel(ERROR_CHANNEL)
            logger.debug("%s", errors)
            # read_all would consume ERR_CHANNEL, so read_all needs to be last
            response = ws_client.read_all()
            if errors:
                # errors = '{"metadata":{},"status":"Success"}'
                j = json.loads(errors)
                status = j.get("status", None)
                if status == "Success":
                    logger.info("exec command succeeded, yay!")
                    self._copy_mdir_from_pod(unique_dir)
                elif status == "Failure":
                    logger.info("exec command failed")
                    self._copy_mdir_from_pod(unique_dir)

                    # ('{"metadata":{},"status":"Failure","message":"command terminated with '
                    #  'non-zero exit code: Error executing in Docker Container: '
                    #  '1","reason":"NonZeroExitCode","details":{"causes":[{"reason":"ExitCode","message":"1"}]}}')
                    causes = j.get("details", {}).get("causes", [])
                    rc = 999
                    for c in causes:
                        if c.get("reason", None) == "ExitCode":
                            try:
                                rc = int(c.get("message", None))
                            except ValueError:
                                rc = 999
                    raise SandcastleCommandFailed(output=response,
                                                  reason=errors,
                                                  rc=rc)
                else:
                    logger.warning(
                        "exec didn't yield the metadata we expect, mighty suspicious, %s",
                        errors,
                    )
        finally:
            ws_client.close()

        logger.debug("exec response = %r" % response)
        return response
コード例 #2
0
    def deploy_pod(self, command: Optional[List] = None):
        """
        Deploy a pod and babysit it. If it exists already, remove it.
        """
        if self.mapped_dir and command:
            raise SandcastleException(
                "Since you set your own command, we cannot sync the local dir"
                " inside because there is a race condition between the pod start"
                " and the copy process. Please use exec instead.")

        logger.info("Deploying pod %s", self.pod_name)
        if self.is_pod_already_deployed():
            self.delete_pod()

        pod_manifest = self.create_pod_manifest(command=command)
        self.create_pod(pod_manifest)

        # wait for the pod to start
        count = 0
        logger.debug("pod = %r" % self.pod_name)
        while True:
            resp = self.get_pod()
            # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
            if resp.status.phase != "Pending":
                logger.info("pod is no longed pending - status: %s",
                            resp.status.phase)
                break
            time.sleep(1)
            count += 1
            if count > 600:
                logger.error("The pod did not start on time, "
                             "status = %r" % resp.status)
                raise RuntimeError(
                    "The pod did not start in 600 seconds: something's wrong.")

        if resp.status.phase == "Failed":
            # > resp.status.container_statuses[0].state
            # {'running': None,
            #  'terminated': {'container_id': 'docker://f3828...
            #                 'exit_code': 2,
            #                 'finished_at': datetime.datetime(2019, 6, 7,...
            #                 'message': None,
            #                 'reason': 'Error',
            #                 'signal': None,
            #                 'started_at': datetime.datetime(2019, 6, 7,...
            #  'waiting': None}

            raise SandcastleCommandFailed(
                output=self.get_logs(),
                reason=str(resp.status),
                rc=self.get_rc_from_v1pod(resp),
            )

        if command:
            # wait for the pod to finish since the command is set
            while True:
                resp = self.get_pod()
                # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
                if resp.status.phase == "Failed":
                    logger.info("The pod has failed execution: you should "
                                "inspect logs or check `oc describe`")
                    raise SandcastleCommandFailed(
                        output=self.get_logs(),
                        reason=str(resp.status),
                        rc=self.get_rc_from_v1pod(resp),
                    )
                if resp.status.phase == "Succeeded":
                    logger.info(
                        "All Containers in the pod have finished successfully."
                    )
                    break
                # TODO: can we use watch instead?
                time.sleep(1)
コード例 #3
0
    def exec(
        self,
        command: List[str],
        env: Optional[Dict] = None,
        cwd: Union[str, Path] = None,
    ) -> str:
        """
        exec a command in a running pod

        :param command: command to run
        :param env: a Dict with env vars to set for the exec'd command
        :param cwd: run the command in this subdirectory of a mapped dir,
               defaults to a mapped dir or a temporary directory if mapped_dir is not set
        :returns logs
        """
        if not self.mapped_dir and cwd:
            raise SandcastleException(
                "The cwd argument only works with a mapped dir - "
                "please set a mapped dir or change directory in the command you provide."
            )
        # we need to check first if the pod is running; otherwise we'd get a nasty 500
        if not self.is_pod_running():
            raise SandcastleTimeoutReached(
                "You have reached a timeout: the pod is no longer running.")
        logger.info("command = %s", command)

        target_dir = None if not self.mapped_dir else Path(
            self.mapped_dir.path)
        unique_dir, target_script_path = self._prepare_exec(
            command, target_dir=target_dir, env=env, cwd=cwd)
        command = ["bash", str(target_script_path)]
        if self.mapped_dir:
            self._copy_path_to_pod(self.mapped_dir.local_dir, unique_dir)
        # https://github.com/kubernetes-client/python/blob/master/examples/exec.py
        # https://github.com/kubernetes-client/python/issues/812#issuecomment-499423823
        # FIXME: refactor this junk into a dedicated function, ideally to _do_exec
        ws_client: WSClient = self._do_exec(command, preload_content=False)
        try:
            # https://github.com/packit-service/sandcastle/issues/23
            # even with a >0 number or ==0, select tends to block
            response = ""
            errors = ""
            while ws_client.is_open():
                ws_client.run_forever(timeout=WEBSOCKET_CALL_TIMEOUT)
                errors += ws_client.read_channel(ERROR_CHANNEL)
                logger.debug("%s", errors)
                # read_all would consume ERR_CHANNEL, so read_all needs to be last
                response += ws_client.read_all()
            if errors:
                # errors = '{"metadata":{},"status":"Success"}'
                j = json.loads(errors)
                status = j.get("status", None)
                if status == "Success":
                    logger.info("exec command succeeded, yay!")
                    self._copy_mdir_from_pod(unique_dir)
                elif status == "Failure":
                    logger.info("exec command failed")
                    logger.debug(j)
                    logger.info(f"output:\n{response}")
                    # the timeout could have been reached here which means
                    # the pod is not running, so we are not able `oc rsync` things from inside:
                    # we won't be needing the data any more since p-s halts execution
                    # after a failure in action, we only do this b/c it's the right thing to do
                    # for use cases outside p-s
                    try:
                        self._copy_mdir_from_pod(unique_dir)
                    except SandcastleException:
                        # yes, we eat the exception because the one raised below
                        # is much more important since it contains metadata about what happened;
                        # logs will contain info about what happened while trying to copy things
                        pass

                    # ('{"metadata":{},"status":"Failure","message":"command terminated with '
                    #  'non-zero exit code: Error executing in Docker Container: '
                    #  '1","reason":"NonZeroExitCode","details":{"causes":[{"reason":"ExitCode","message":"1"}]}}')
                    causes = j.get("details", {}).get("causes", [])
                    rc = 999
                    for c in causes:
                        if c.get("reason", None) == "ExitCode":
                            try:
                                rc = int(c.get("message", None))
                            except ValueError:
                                rc = 999
                    raise SandcastleCommandFailed(output=response,
                                                  reason=errors,
                                                  rc=rc)
                else:
                    logger.warning(
                        "exec didn't yield the metadata we expect, mighty suspicious, %s",
                        errors,
                    )
        finally:
            ws_client.close()

        logger.debug("exec response = %r" % response)
        return response