def exec(self, command: List[str]) -> str: """ exec a command in a running pod :param command: command to run :returns logs """ # we need to check first if the pod is running; otherwise we'd get a nasty 500 pod = self.get_pod() if pod.status.phase != "Running": raise SandcastleTimeoutReached( "You have reached a timeout: the pod is no longer running.") logger.info("command = %s", command) unique_dir = None if self.mapped_dir: unique_dir, target_script_path = self._prepare_mdir_exec( command, target_dir=Path(self.mapped_dir.path)) command = ["bash", str(target_script_path)] self._copy_path_to_pod(self.mapped_dir.local_dir, Path(unique_dir)) # https://github.com/kubernetes-client/python/blob/master/examples/exec.py # https://github.com/kubernetes-client/python/issues/812#issuecomment-499423823 # FIXME: refactor this junk into a dedicated function, ideally to _do_exec ws_client: WSClient = self._do_exec(command, preload_content=False) try: # https://github.com/packit-service/sandcastle/issues/23 # even with a >0 number or ==0, select tends to block ws_client.run_forever(timeout=WEBSOCKET_CALL_TIMEOUT) errors = ws_client.read_channel(ERROR_CHANNEL) logger.debug("%s", errors) # read_all would consume ERR_CHANNEL, so read_all needs to be last response = ws_client.read_all() if errors: # errors = '{"metadata":{},"status":"Success"}' j = json.loads(errors) status = j.get("status", None) if status == "Success": logger.info("exec command succeeded, yay!") self._copy_mdir_from_pod(unique_dir) elif status == "Failure": logger.info("exec command failed") self._copy_mdir_from_pod(unique_dir) # ('{"metadata":{},"status":"Failure","message":"command terminated with ' # 'non-zero exit code: Error executing in Docker Container: ' # '1","reason":"NonZeroExitCode","details":{"causes":[{"reason":"ExitCode","message":"1"}]}}') causes = j.get("details", {}).get("causes", []) rc = 999 for c in causes: if c.get("reason", None) == "ExitCode": try: rc = int(c.get("message", None)) except ValueError: rc = 999 raise SandcastleCommandFailed(output=response, reason=errors, rc=rc) else: logger.warning( "exec didn't yield the metadata we expect, mighty suspicious, %s", errors, ) finally: ws_client.close() logger.debug("exec response = %r" % response) return response
def deploy_pod(self, command: Optional[List] = None): """ Deploy a pod and babysit it. If it exists already, remove it. """ if self.mapped_dir and command: raise SandcastleException( "Since you set your own command, we cannot sync the local dir" " inside because there is a race condition between the pod start" " and the copy process. Please use exec instead.") logger.info("Deploying pod %s", self.pod_name) if self.is_pod_already_deployed(): self.delete_pod() pod_manifest = self.create_pod_manifest(command=command) self.create_pod(pod_manifest) # wait for the pod to start count = 0 logger.debug("pod = %r" % self.pod_name) while True: resp = self.get_pod() # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase if resp.status.phase != "Pending": logger.info("pod is no longed pending - status: %s", resp.status.phase) break time.sleep(1) count += 1 if count > 600: logger.error("The pod did not start on time, " "status = %r" % resp.status) raise RuntimeError( "The pod did not start in 600 seconds: something's wrong.") if resp.status.phase == "Failed": # > resp.status.container_statuses[0].state # {'running': None, # 'terminated': {'container_id': 'docker://f3828... # 'exit_code': 2, # 'finished_at': datetime.datetime(2019, 6, 7,... # 'message': None, # 'reason': 'Error', # 'signal': None, # 'started_at': datetime.datetime(2019, 6, 7,... # 'waiting': None} raise SandcastleCommandFailed( output=self.get_logs(), reason=str(resp.status), rc=self.get_rc_from_v1pod(resp), ) if command: # wait for the pod to finish since the command is set while True: resp = self.get_pod() # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase if resp.status.phase == "Failed": logger.info("The pod has failed execution: you should " "inspect logs or check `oc describe`") raise SandcastleCommandFailed( output=self.get_logs(), reason=str(resp.status), rc=self.get_rc_from_v1pod(resp), ) if resp.status.phase == "Succeeded": logger.info( "All Containers in the pod have finished successfully." ) break # TODO: can we use watch instead? time.sleep(1)
def exec( self, command: List[str], env: Optional[Dict] = None, cwd: Union[str, Path] = None, ) -> str: """ exec a command in a running pod :param command: command to run :param env: a Dict with env vars to set for the exec'd command :param cwd: run the command in this subdirectory of a mapped dir, defaults to a mapped dir or a temporary directory if mapped_dir is not set :returns logs """ if not self.mapped_dir and cwd: raise SandcastleException( "The cwd argument only works with a mapped dir - " "please set a mapped dir or change directory in the command you provide." ) # we need to check first if the pod is running; otherwise we'd get a nasty 500 if not self.is_pod_running(): raise SandcastleTimeoutReached( "You have reached a timeout: the pod is no longer running.") logger.info("command = %s", command) target_dir = None if not self.mapped_dir else Path( self.mapped_dir.path) unique_dir, target_script_path = self._prepare_exec( command, target_dir=target_dir, env=env, cwd=cwd) command = ["bash", str(target_script_path)] if self.mapped_dir: self._copy_path_to_pod(self.mapped_dir.local_dir, unique_dir) # https://github.com/kubernetes-client/python/blob/master/examples/exec.py # https://github.com/kubernetes-client/python/issues/812#issuecomment-499423823 # FIXME: refactor this junk into a dedicated function, ideally to _do_exec ws_client: WSClient = self._do_exec(command, preload_content=False) try: # https://github.com/packit-service/sandcastle/issues/23 # even with a >0 number or ==0, select tends to block response = "" errors = "" while ws_client.is_open(): ws_client.run_forever(timeout=WEBSOCKET_CALL_TIMEOUT) errors += ws_client.read_channel(ERROR_CHANNEL) logger.debug("%s", errors) # read_all would consume ERR_CHANNEL, so read_all needs to be last response += ws_client.read_all() if errors: # errors = '{"metadata":{},"status":"Success"}' j = json.loads(errors) status = j.get("status", None) if status == "Success": logger.info("exec command succeeded, yay!") self._copy_mdir_from_pod(unique_dir) elif status == "Failure": logger.info("exec command failed") logger.debug(j) logger.info(f"output:\n{response}") # the timeout could have been reached here which means # the pod is not running, so we are not able `oc rsync` things from inside: # we won't be needing the data any more since p-s halts execution # after a failure in action, we only do this b/c it's the right thing to do # for use cases outside p-s try: self._copy_mdir_from_pod(unique_dir) except SandcastleException: # yes, we eat the exception because the one raised below # is much more important since it contains metadata about what happened; # logs will contain info about what happened while trying to copy things pass # ('{"metadata":{},"status":"Failure","message":"command terminated with ' # 'non-zero exit code: Error executing in Docker Container: ' # '1","reason":"NonZeroExitCode","details":{"causes":[{"reason":"ExitCode","message":"1"}]}}') causes = j.get("details", {}).get("causes", []) rc = 999 for c in causes: if c.get("reason", None) == "ExitCode": try: rc = int(c.get("message", None)) except ValueError: rc = 999 raise SandcastleCommandFailed(output=response, reason=errors, rc=rc) else: logger.warning( "exec didn't yield the metadata we expect, mighty suspicious, %s", errors, ) finally: ws_client.close() logger.debug("exec response = %r" % response) return response