def _init_pod_delete(self): """Teardown the init Pod after the context has been copied into the volume. """ log.debug(f"deleting init pod {self._init_pod_name}") self._kclient.delete_namespaced_pod(self._init_pod_name, namespace=self._namespace, body=V1DeleteOptions())
def _vol_claim_delete(self): """Delete the PersistentVolumeClaim. """ log.debug(f"deleting volume claim {self._vol_claim_name}") self._kclient.delete_namespaced_persistent_volume_claim( self._vol_claim_name, namespace=self._namespace, body=V1DeleteOptions())
def _create_container(self, cid, step): build, _, img, tag, build_ctx_path = self._get_build_info(step) if build: log.info( f"docker build {img}:{tag} {build_ctx_path}", extra={"pretag": f"[{step.id}]"}, ) if not self._config.dry_run: streamer = self._d.api.build( decode=True, path=build_ctx_path, tag=f"{img}:{tag}", rm=True, ) for chunk in streamer: if self._config.quiet: continue if "stream" in chunk: lines = [ line for line in chunk["stream"].splitlines() if line ] for line in lines: log.step_info(line.strip()) elif not self._config.skip_pull and not step.skip_pull: log.info(f"docker pull {img}:{tag}", extra={"pretag": f"[{step.id}]"}) if not self._config.dry_run: self._d.images.pull(repository=f"{img}:{tag}") if self._config.dry_run: return container_args = self._get_container_kwargs(step, f"{img}:{tag}", cid) if "volumes" not in container_args: container_args["volumes"] = [] else: container_args["volumes"] = list(container_args["volumes"]) container_args["volumes"].append( "/var/run/docker.sock:/var/run/docker.sock") log.debug(f"Container args: {container_args}") msg = f"docker create name={cid}" msg += f' image={container_args["image"]}' if container_args["entrypoint"]: msg += f' entrypoint={container_args["entrypoint"]}' if container_args["command"]: msg += f' command={container_args["command"]}' log.info(msg, extra={"pretag": f"[{step.id}]"}) container = self._d.containers.create(**container_args) return container
def get_sha(): """Runs git rev-parse --short HEAD and returns result""" init_repo_object() try: return repo.git.rev_parse(repo.head.object.hexsha, short=True) except ValueError as e: log.debug(e) log.fail('Could not obtain revision of repository located at {}' .format(get_git_root_folder()))
def _pod_exit_code(self): """Read the exit code from the Pod to decide the exit code of the step. """ time.sleep(2) response = self._kclient.read_namespaced_pod(name=self._pod_name, namespace=self._namespace) log.debug(f"got status {response.status.phase}") if response.status.phase != "Succeeded": return 1 return 0
def run(self, reuse=False): build = True if 'docker://' in self.action['uses']: tag = self.action['uses'].replace('docker://', '') if ':' not in tag: tag += ":latest" build = False dockerfile_path = 'n/a' elif './' in self.action['uses']: action_dir = os.path.basename(self.action['uses'].replace( './', '')) if self.env['GITHUB_REPOSITORY'] == 'unknown': repo_id = '' else: repo_id = self.env['GITHUB_REPOSITORY'] if action_dir: repo_id += '/' tag = repo_id + action_dir + ':' + self.env['GITHUB_SHA'] dockerfile_path = os.path.join(os.getcwd(), self.action['uses']) else: _, _, user, repo, _, version = scm.parse(self.action['uses']) tag = '{}/{}:{}'.format(user, repo, version) dockerfile_path = os.path.join(self.action['repo_dir'], self.action['action_dir']) log.debug('docker tag: {}'.format(tag)) log.debug('dockerfile path: {}'.format(dockerfile_path)) if not reuse: if self.docker_exists(): self.docker_rm() if build: self.docker_build(tag, dockerfile_path) else: self.docker_pull(tag) self.docker_create(tag) else: if not self.docker_exists(): if build: self.docker_build(tag, dockerfile_path) else: self.docker_pull(tag) self.docker_create(tag) if self.container is not None: popper.cli.docker_list.append(self.container) e = self.docker_start() if e != 0: log.fail("Action '{}' failed!".format(self.action['name']))
def _pod_read_log(self): """Read logs from the Pod after it moves into `Running` state. """ log.debug(f"reading logs from {self._pod_name}") response = self._kclient.read_namespaced_pod_log( name=self._pod_name, namespace=self._namespace, follow=True, tail_lines=10, _preload_content=False, ) for line in response: log.step_info(line.decode().rstrip())
def _vol_claim_create(self): """Create a PersistentVolumeClaim to claim usable storage space from a previously created PersistentVolume. """ if self._config.resman_opts.get("persistent_volume_name", None): volume_name = self._config.resman_opts.persistent_volume_name else: volume_name = f"pv-hostpath-popper-{self._config.wid}" if not self._vol_exists(volume_name): self._vol_create(volume_name) vol_claim_conf = { "apiVersion": "v1", "kind": "PersistentVolumeClaim", "metadata": { "name": self._vol_claim_name }, "spec": { "storageClassName": "manual", "accessModes": ["ReadWriteMany"], "resources": { "requests": { "storage": self._vol_size } }, "volumeName": volume_name, }, } self._kclient.create_namespaced_persistent_volume_claim( namespace=self._namespace, body=vol_claim_conf) # wait for the volume claim to go into `Bound` state. counter = 1 while True: response = self._kclient.read_namespaced_persistent_volume_claim( self._vol_claim_name, namespace=self._namespace) if response.status.phase != "Pending": break log.debug(f"volume claim {self._vol_claim_name} not created yet") if counter == 60: raise Exception( "Timed out waiting for PersistentVolumeClaim creation") time.sleep(1) counter += 1
def __init__(self, init_podman_client=True, **kw): super(PodmanRunner, self).__init__(**kw) self._spawned_containers = set() if not init_podman_client: return try: _, _, self._p_info = HostRunner._exec_cmd(["podman", "info"], logging=False) self._p_version = HostRunner._exec_cmd(["podman", "version"], logging=False) except Exception as e: log.debug(f"Podman error: {e}") log.fail("Unable to connect to podman, is it installed?") log.debug(f"Podman info: {pu.prettystr(self._p_info)}")
def _vol_create(self, volume_name): """Create a default PersistentVolume of hostPath type. """ hostpathvol_path = "/tmp" hostpathvol_size = "1Gi" if self._config.resman_opts.get("hostpathvol_path", None): hostpathvol_path = self._config.resman_opts.hostpathvol_path if self._config.resman_opts.get("hostpathvol_size", None): hostpathvol_size = self._config.resman_opts.hostpathvol_size vol_conf = { "kind": "PersistentVolume", "apiVersion": "v1", "metadata": { "name": volume_name, "labels": { "type": "host" } }, "spec": { "persistentVolumeReclaimPolicy": "Recycle", "storageClassName": "manual", "capacity": { "storage": hostpathvol_size, }, "accessModes": ["ReadWriteMany"], "hostPath": { "path": hostpathvol_path }, }, } self._kclient.create_persistent_volume(body=vol_conf) counter = 1 while True: response = self._kclient.read_persistent_volume(volume_name) if response.status.phase != "Pending": break log.debug(f"volume {volume_name} not created yet") if counter == 60: raise Exception( "Timed out waiting for PersistentVolume creation") time.sleep(1) counter += 1
def __init__(self, init_docker_client=True, **kw): super(DockerRunner, self).__init__(**kw) self._spawned_containers = set() self._d = None if not init_docker_client: return try: self._d = docker.from_env() self._d.version() except Exception as e: log.debug(f'Docker error: {e}') log.fail(f'Unable to connect to the docker daemon.') log.debug(f'Docker info: {pu.prettystr(self._d.info())}')
def __init__(self, wfile, workspace, dry_run, reuse, parallel, skip_secrets_prompt=False): wfile = pu.find_default_wfile(wfile) self.workspace = workspace self.dry_run = dry_run self.reuse = reuse self.parallel = parallel self.skip_secrets_prompt = skip_secrets_prompt self.actions_cache_path = os.path.join('/', 'tmp', 'actions') # Initialize a Worklow. During initialization all the validation # takes place automatically. self.wf = Workflow(wfile) self.check_secrets() log.debug('workflow:\n{}'.format( yaml.dump(self.wf, default_flow_style=False, default_style='')))
def docker_create(self, img): log.info('{}[{}] docker create {} {}'.format( self.msg_prefix, self.action['name'], img, ' '.join(self.action.get('args', '')))) if self.dry_run: return env_vars = self.action.get('env', {}) for s in self.action.get('secrets', []): env_vars.update({s: os.environ.get(s)}) for e, v in self.env.items(): env_vars.update({e: v}) env_vars.update({'HOME': os.environ['HOME']}) # Bind the local volumes to volumes inside container volumes = {} volumes[env_vars['HOME']] = {'bind': env_vars['HOME']} volumes[env_vars['GITHUB_EVENT_PATH']] = { 'bind': '/github/workflow/event.json' } volumes[env_vars['GITHUB_WORKSPACE']] = { 'bind': env_vars['GITHUB_WORKSPACE'] } volumes['/var/run/docker.sock'] = {'bind': '/var/run/docker.sock'} # Update the corresponding env vars accordingly. env_vars['GITHUB_EVENT_PATH'] = '/github/workflow/event.json' log.debug('Invoking docker_create() method\n' + ' img: {}\n'.format(img) + ' cmd: {}\n'.format(self.action.get('args', None)) + ' vol: {}\n'.format(volumes) + ' args: {}'.format(self.action.get('args', None))) self.container = self.docker_client.containers.create( image=img, command=self.action.get('args', None), name=self.cid, volumes=volumes, working_dir=env_vars['GITHUB_WORKSPACE'], environment=env_vars, entrypoint=self.action.get('runs', None), detach=True)
def _get_container_kwargs(self, step, img, name): args = { "image": img, "command": list(step.args), "name": name, "volumes": [f"{self._config.workspace_dir}:/workspace:Z",], "working_dir": step.dir if step.dir else "/workspace", "environment": self._prepare_environment(step), "entrypoint": step.runs if step.runs else None, "detach": not self._config.pty, "tty": self._config.pty, "stdin_open": self._config.pty, } self._update_with_engine_config(args) args.update(step.options) log.debug(f"container args: {pu.prettystr(args)}\n") return args
def _exec_mpi(self, cmd, step, **kwargs): self._set_config_vars(step) job_name = pu.sanitized_name(step.id, self._config.wid) mpi_cmd = ["mpirun", f"{' '.join(cmd)}"] job_script = os.path.join(f"{job_name}.sh") out_file = os.path.join(f"{job_name}.out") with open(out_file, "w"): pass with open(job_script, "w") as f: f.write("#!/bin/bash\n") f.write(f"#SBATCH --job-name={job_name}\n") f.write(f"#SBATCH --output={out_file}\n") f.write(f"#SBATCH --nodes={self._nodes}\n") f.write(f"#SBATCH --ntasks={self._ntasks}\n") f.write(f"#SBATCH --ntasks-per-node={self._ntasks_per_node}\n") if self._nodelist: f.write(f"#SBATCH --nodelist={self._nodelist}\n") f.write(" ".join(mpi_cmd)) sbatch_cmd = [ "sbatch", "--wait", ] sbatch_cmd.extend(self._get_resman_kwargs(step)) sbatch_cmd.extend([job_script]) log.debug(f"Command: {sbatch_cmd}") if self._config.dry_run: return 0 self._spawned_jobs.add(job_name) self._start_out_stream(out_file) _, ecode, _ = HostRunner._exec_cmd(sbatch_cmd, **kwargs) self._stop_out_stream() self._spawned_jobs.remove(job_name) return ecode
def run(self, step): """Execute a step in a kubernetes cluster.""" self._pod_name = self._base_pod_name + f"-{step.id}" needs_build, _, img, tag, _ = self._get_build_info(step) if needs_build: log.fail(f"Cannot build ") image = f"{img}:{tag}" m = f"[{step.id}] kubernetes run {self._namespace}.{self._pod_name}" log.info(m) if self._config.dry_run: return 0 ecode = 1 try: if not self._vol_claim_created: if not self._vol_claim_exists(): self._vol_claim_create() self._vol_claim_created = True if not self._init_pod_created: e, self._pod_host_node = self._init_pod_schedule() if e: raise Exception("None of the nodes are schedulable.") self._copy_ctx() self._init_pod_delete() self._init_pod_created = True self._pod_create(step, image, self._pod_host_node) self._pod_read_log() ecode = self._pod_exit_code() except Exception as e: log.fail(e) finally: self._pod_delete() log.debug(f"returning with {ecode}") return ecode
def _get_container_kwargs(self, step, img, name): args = { "image": img, "command": list(step.args), "name": name, "volumes": [ f"{self._config.workspace_dir}:/workspace", "/var/run/docker.sock:/var/run/docker.sock", ], "working_dir": "/workspace", "environment": self._prepare_environment(step), "entrypoint": step.runs if step.runs else None, "detach": True, } self._update_with_engine_config(args) log.debug(f"container args: {pu.prettystr(args)}\n") return args
def get_sha(): """Runs git rev-parse --short HEAD and returns result. This function returns 'unknown' if the project folder is not a git repo. It fails, when the project folder is a git repo but doesn't have any commit. Returns: str: The sha of the head commit or 'unknown'. """ repo = init_repo_object() if repo: try: return repo.git.rev_parse(repo.head.object.hexsha, short=True) except ValueError as e: log.debug(e) log.fail( 'Could not obtain revision of repository located at {}'.format( get_git_root_folder())) else: return 'unknown'
def _get_container_options(self): container_args = { "userns": True, "pwd": "/workspace", "bind": [f"{self._config.workspace_dir}:/workspace"], } self._update_with_engine_config(container_args) options = [] for k, v in container_args.items(): if isinstance(v, list): for item in v: options.append(pu.key_value_to_flag(k, item)) else: options.append(pu.key_value_to_flag(k, v)) options = " ".join(options).split(" ") log.debug(f"container options: {options}\n") return options
def _get_container_options(self): container_args = { 'userns': True, 'pwd': '/workspace', 'bind': [f'{self._config.workspace_dir}:/workspace'] } self._update_with_engine_config(container_args) options = [] for k, v in container_args.items(): if isinstance(v, list): for item in v: options.append(pu.key_value_to_flag(k, item)) else: options.append(pu.key_value_to_flag(k, v)) options = ' '.join(options).split(' ') log.debug(f'container options: {options}\n') return options
def run(self, step): step_env = self._prepare_environment(step, env=dict(os.environ)) if not step.runs: raise AttributeError("Expecting 'runs' attribute in step.") cmd = step.runs + tuple(step.args) log.info(f"[{step.id}] {cmd}") if self._config.dry_run: return 0 log.debug(f"Environment:\n{pu.prettystr(step_env)}") pid, ecode, _ = HostRunner._exec_cmd( cmd, env=step_env, cwd=self._config.workspace_dir, pids=self._spawned_pids ) if pid != 0: self._spawned_pids.remove(pid) return ecode
def _exec_cmd(cmd, env=None, cwd=os.getcwd(), pids=set(), logging=True): pid = 0 ecode = None try: with Popen( cmd, stdout=PIPE, stderr=STDOUT, universal_newlines=True, preexec_fn=os.setsid, env=env, cwd=cwd, ) as p: pid = p.pid pids.add(p.pid) log.debug("Reading process output") output = [] for line in iter(p.stdout.readline, ""): if logging: log.step_info(line.rstrip()) else: output.append(line.rstrip()) p.wait() ecode = p.poll() log.debug(f"Code returned by process: {ecode}") except SubprocessError as ex: output = "" if not ecode: ecode = 1 log.step_info(f"Command '{cmd[0]}' failed with: {ex}") except Exception as ex: output = "" ecode = 1 log.step_info(f"Command raised non-SubprocessError error: {ex}") return pid, ecode, "\n".join(output)
def __apply_substitution(wf_element, k, v, used_registry): if isinstance(wf_element, str): if k in wf_element: log.debug(f"Applying substitution to string {k}") wf_element = wf_element.replace(k, v) used_registry[k] = 1 elif isinstance(wf_element, list): # we assume list of strings for i, e in enumerate(wf_element): if k in e: log.debug(f"Applying substitution to item {i}: {e}") wf_element[i] = wf_element[i].replace(k, v) used_registry[k] = 1 elif isinstance(wf_element, dict): # we assume map of strings for ek in wf_element: if k in ek: log.fail("Substitutions not allowed on dictionary keys") if k in wf_element[ek]: log.debug(f"Applying substitution to value associated to key {k}") wf_element[ek] = wf_element[ek].replace(k, v) used_registry[k] = 1 return wf_element
def run(self, step): step_env = StepRunner.prepare_environment(step, os.environ) cmd = step.get('runs', []) if not cmd: raise AttributeError(f"Expecting 'runs' attribute in step.") cmd.extend(step.get('args', [])) log.info(f'[{step["name"]}] {cmd}') if self._config.dry_run: return 0 log.debug(f'Environment:\n{pu.prettystr(os.environ)}') pid, ecode, _ = HostRunner._exec_cmd(cmd, step_env, self._config.workspace_dir, self._spawned_pids) if pid != 0: self._spawned_pids.remove(pid) return ecode
def _init_pod_schedule(self): """If a node selector is not provided, select a node randomly and stick to it.""" e = 0 pod_host_node = None if self._config.resman_opts.get("pod_host_node", None): e = self._init_pod_create(self._config.resman_opts.pod_host_node) pod_host_node = self._config.resman_opts.pod_host_node elif not self._config.resman_opts.get("persistent_volume_name", None): nodes = [] for node in self._kclient.list_node().items: node_role = "" if node.spec.taints and len(node.spec.taints) > 0: node_role = node.spec.taints[0].key if (node_role != "node-role.kubernetes.io/master" and node_role != "node-role.kubernetes.io/unreachable"): nodes.insert( 0, node.metadata.labels["kubernetes.io/hostname"]) else: nodes.insert( len(nodes), node.metadata.labels["kubernetes.io/hostname"]) for node in nodes: log.debug(f"trying to schedule init pod on {node}") e = self._init_pod_create(node) if not e: pod_host_node = node break else: self._init_pod_delete() else: e = self._init_pod_create() pod_host_node = None return e, pod_host_node
def cli(ctx, file, step, entrypoint): """Opens an interactive shell using all the attributes defined in the workflow file for the given STEP, but ignoring ``runs`` and ``args``. By default, it invokes /bin/bash. If you need to invoke another one, you can specify it in the --entrypoint flag. NOTE: this command only works for (local) host runner in Docker. """ wf = WorkflowParser.parse(file=file, step=step, immutable=False) # override entrypoint step = wf.steps[0] step.args = [] step.runs = entrypoint # configure runner so containers execute in attached mode and create a tty config = ConfigLoader.load(engine_name="docker", pty=True) with WorkflowRunner(config) as runner: try: runner.run(wf) except Exception as e: log.debug(traceback.format_exc()) log.fail(e)
def run(self, reuse=False): cmd = self.action.get('runs', ['entrypoint.sh']) cmd[0] = os.path.join('./', cmd[0]) cmd.extend(self.action.get('args', [])) cwd = self.cwd if not self.dry_run: if 'repo_dir' in self.action: os.chdir(self.action['repo_dir']) cmd[0] = os.path.join(self.action['repo_dir'], cmd[0]) else: os.chdir(os.path.join(cwd, self.action['uses'])) cmd[0] = os.path.join(cwd, self.action['uses'], cmd[0]) os.environ.update(self.action.get('env', {})) log.info('{}[{}] {}'.format(self.msg_prefix, self.action['name'], ' '.join(cmd))) if self.dry_run: return ecode = 0 try: log.debug('Executing: {}'.format(' '.join(cmd))) p = Popen(cmd, stdout=PIPE, stderr=STDOUT, universal_newlines=True, preexec_fn=os.setsid) popper.cli.process_list.append(p.pid) log.debug('Reading process output') for line in iter(p.stdout.readline, ''): line_decoded = pu.decode(line) log.info(line_decoded[:-1]) ecode = p.poll() log.debug('Code returned by process: {}'.format(ecode)) except CalledProcessError as ex: msg = "Command '{}' failed: {}".format(cmd, ex) ecode = ex.returncode log.info(msg) finally: log.info() # remove variables that we added to the environment for i in self.action.get('env', {}): os.environ.pop(i) os.chdir(cwd) if ecode != 0: log.fail("Action '{}' failed.".format(self.action['name']))
def host_start(self, cmd): """Start the execution of the command on the host machine. Args: cmd(str): The command to execute. Returns: int: The return code of the process. """ log.info('{}[{}] {}'.format(self.msg_prefix, self.action['name'], ' '.join(cmd))) if self.dry_run: return 0 ecode = 0 try: log.debug('Executing: {}'.format(' '.join(cmd))) p = Popen(' '.join(cmd), stdout=PIPE, stderr=STDOUT, shell=True, universal_newlines=True, preexec_fn=os.setsid) popper.cli.process_list.append(p.pid) log.debug('Reading process output') for line in iter(p.stdout.readline, ''): line_decoded = pu.decode(line) log.action_info(line_decoded[:-1]) p.wait() ecode = p.poll() log.debug('Code returned by process: {}'.format(ecode)) except CalledProcessError as ex: msg = "Command '{}' failed: {}".format(cmd, ex) ecode = ex.returncode log.action_info(msg) finally: log.action_info() os.chdir(self.cwd) return ecode
def stop_running_tasks(self): """Delete the Pod and then the PersistentVolumeClaim upon receiving SIGINT. """ log.debug("received SIGINT. deleting pod and volume claim") self._pod_delete()
def _pod_create(self, step, image, pod_host_node=None): """Start a Pod for each step. """ log.debug(f"trying to start step pod on {pod_host_node}") env = self._prepare_environment(step) log.debug(env) ws_vol_mount = f"{self._pod_name}-ws" pod_conf = { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": self._pod_name }, "spec": { "restartPolicy": "Never", "containers": [{ "image": image, "name": f"{step.id}", "workingDir": "/workspace", "volumeMounts": [{ "name": ws_vol_mount, "mountPath": "/workspace", }], }], "volumes": [{ "name": ws_vol_mount, "persistentVolumeClaim": { "claimName": self._vol_claim_name, }, }], }, } if len(env.keys()) > 0: pod_conf["spec"]["containers"][0]["env"] = [] for name, value in env.items(): pod_conf["spec"]["containers"][0]["env"].append({ "name": name, "value": value }) if pod_host_node: pod_conf["spec"]["nodeSelector"] = { "kubernetes.io/hostname": pod_host_node } runs = list(step.runs) if step.runs else None args = list(step.args) if step.args else None if runs: pod_conf["spec"]["containers"][0]["command"] = runs if args: pod_conf["spec"]["containers"][0]["args"] = args self._kclient.create_namespaced_pod(body=pod_conf, namespace=self._namespace) counter = 1 while True: response = self._kclient.read_namespaced_pod( self._pod_name, namespace=self._namespace) if response.status.phase != "Pending": break log.debug(f"pod {self._pod_name} not started yet") if counter == self._config.resman_opts.get("pod_retry_limit", 60): raise Exception("Timed out waiting for Pod to start") time.sleep(1) counter += 1