def job(self, joborder, basedir, output_callback, move_outputs=True, **kwargs): self.state = {} self.processStatus = "success" if "outdir" in kwargs: del kwargs["outdir"] for i in self.tool["inputs"]: (_, iid) = urlparse.urldefrag(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem( i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem( i, copy.deepcopy(i["default"])) else: raise WorkflowException( "Input '%s' not in input object and does not have a default value." % (i["id"])) for s in self.steps: for out in s.tool["outputs"]: self.state[out["id"]] = None output_dirs = set() completed = 0 while completed < len(self.steps) and self.processStatus == "success": made_progress = False completed = 0 for step in self.steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, **kwargs): if newjob: made_progress = True if newjob.outdir: output_dirs.add(newjob.outdir) yield newjob if not made_progress and completed < len(self.steps): yield None wo = object_from_state(self.state, self.tool["outputs"], True) if move_outputs: targets = set() conflicts = set() outfiles = findfiles(wo) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a) + 1:]) if dst in targets: conflicts.add(dst) else: targets.add(dst) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a) + 1:]) if dst in conflicts: sp = os.path.splitext(dst) dst = "%s-%s%s" % ( sp[0], str(random.randint(1, 1000000000)), sp[1]) dirname = os.path.dirname(dst) if not os.path.exists(dirname): os.makedirs(dirname) _logger.debug("[workflow %s] Moving '%s' to '%s'", id(self), src, dst) shutil.move(src, dst) f["path"] = dst for a in output_dirs: if os.path.exists(a) and empty_subtree(a): _logger.debug( "[workflow %s] Removing intermediate output directory %s", id(self), a) shutil.rmtree(a, True) _logger.info("[workflow %s] outdir is %s", id(self), self.outdir) output_callback(wo, self.processStatus)
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): #_logger.info("[job %s] starting with outdir %s", id(self), self.outdir) if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.exists(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException("Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/tmp/job_output")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp/job_tmp")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp/job_tmp") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir stdin = None stdout = None _logger.info("[job %s] exec %s%s%s", id(self), " ".join([shellescape.quote(arg) if needs_shell_quoting(arg) else arg for arg in (runtime + self.command_line)]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.symlink(self.generatefiles[t]["path"], os.path.join(self.outdir, t)) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen(runtime + self.command_line, shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.remove(os.path.join(self.outdir, t)) os.symlink(self.pathmapper.reversemap(self.generatefiles[t]["path"])[1], os.path.join(self.outdir, t)) outputs = self.collect_outputs(self.outdir) except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", id(self), processStatus) else: _logger.debug("[job %s] completed %s", id(self), processStatus) _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", id(self), self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", id(self), self.tmpdir) shutil.rmtree(self.outdir, True)
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.isfile(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found or is not a regular file." % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException("Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) runtime.append("--workdir=%s" % ("/var/spool/cwl")) runtime.append("--read-only=true") runtime.append("--net=none") euid = docker_vm_uid() or os.geteuid() runtime.append("--user=%s" % (euid)) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir vars_to_preserve = kwargs.get("preserve_environment") if vars_to_preserve is not None: for key, value in os.environ.items(): if key in vars_to_preserve and key not in env: env[key] = value stdin = None stdout = None scr, _ = get_feature(self, "ShellCommandRequirement") if scr: shouldquote = lambda x: False else: shouldquote = needs_shell_quoting_re.search _logger.info("[job %s] %s$ %s%s%s", self.name, self.outdir, " ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src = self.generatefiles[t]["path"] dst = os.path.join(self.outdir, t) if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: _logger.debug("symlinking %s to %s", dst, src) os.symlink(src, dst) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen([str(x) for x in runtime + self.command_line], shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src = self.generatefiles[t]["path"] dst = os.path.join(self.outdir, t) if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: os.remove(dst) os.symlink(self.pathmapper.reversemap(src)[1], dst) outputs = self.collect_outputs(self.outdir) except OSError as e: if e.errno == 2: if runtime: _logger.error("'%s' not found", runtime[0]) else: _logger.error("'%s' not found", self.command_line[0]) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as e: _logger.error("Error while running job: %s" % e) processStatus = "permanentFail" except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", self.name, processStatus) else: _logger.debug("[job %s] completed %s", self.name, processStatus) _logger.debug("[job %s] %s", self.name, json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", self.name, self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", self.name, self.outdir) shutil.rmtree(self.outdir, True)
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.exists(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException( "Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/tmp/job_output")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp/job_tmp")) runtime.append("--workdir=%s" % ("/tmp/job_output")) euid = docker_vm_uid() or os.geteuid() runtime.append("--user=%s" % (euid)) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp/job_tmp") for t, v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir stdin = None stdout = None _logger.info( "[job %s] %s$ %s%s%s", id(self), self.outdir, " ".join([ shellescape.quote(arg) if needs_shell_quoting(arg) else arg for arg in (runtime + self.command_line) ]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.symlink(self.generatefiles[t]["path"], os.path.join(self.outdir, t)) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen(runtime + self.command_line, shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.remove(os.path.join(self.outdir, t)) os.symlink( self.pathmapper.reversemap( self.generatefiles[t]["path"])[1], os.path.join(self.outdir, t)) outputs = self.collect_outputs(self.outdir) except OSError as e: if e.errno == 2: if runtime: _logger.error("'%s' not found", runtime[0]) else: _logger.error("'%s' not found", self.command_line[0]) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as e: _logger.error("Error while running job: %s" % e) processStatus = "permanentFail" except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", id(self), processStatus) else: _logger.debug("[job %s] completed %s", id(self), processStatus) _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", id(self), self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", id(self), self.tmpdir) shutil.rmtree(self.outdir, True)
def job(self, joborder, basedir, output_callback, move_outputs=True, **kwargs): self.state = {} self.processStatus = "success" if "outdir" in kwargs: del kwargs["outdir"] for i in self.tool["inputs"]: iid = shortname(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"])) else: raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in self.steps: for out in s.tool["outputs"]: self.state[out["id"]] = None output_dirs = set() completed = 0 iterables = [] while completed < len(self.steps) and self.processStatus == "success": made_progress = False for step in self.steps: if not step.submitted: step.iterable = self.try_make_job(step, basedir, **kwargs) if step.iterable: for newjob in step.iterable: if newjob: made_progress = True if newjob.outdir: output_dirs.add(newjob.outdir) yield newjob else: break completed = sum(1 for s in self.steps if s.completed) if not made_progress and completed < len(self.steps): yield None supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0]) wo = object_from_state(self.state, self.tool["outputs"], True, supportsMultipleInput) if wo is None: raise WorkflowException("Output for workflow not available") if move_outputs: targets = set() conflicts = set() outfiles = findfiles(wo) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a)+1:]) if dst in targets: conflicts.add(dst) else: targets.add(dst) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a)+1:]) if dst in conflicts: sp = os.path.splitext(dst) dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1]) dirname = os.path.dirname(dst) if not os.path.exists(dirname): os.makedirs(dirname) _logger.debug("[workflow %s] Moving '%s' to '%s'", self.name, src, dst) shutil.move(src, dst) f["path"] = dst for a in output_dirs: if os.path.exists(a) and empty_subtree(a): if kwargs.get("rm_tmpdir", True): _logger.debug("[workflow %s] Removing intermediate output directory %s", self.name, a) shutil.rmtree(a, True) _logger.info("[workflow %s] outdir is %s", self.name, self.outdir) output_callback(wo, self.processStatus)
def job(self, joborder, basedir, output_callback, move_outputs=True, **kwargs): self.state = {} self.processStatus = "success" if "outdir" in kwargs: del kwargs["outdir"] for i in self.tool["inputs"]: (_, iid) = urlparse.urldefrag(i["id"]) if iid in joborder: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"])) else: raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in self.steps: for out in s.tool["outputs"]: self.state[out["id"]] = None output_dirs = set() completed = 0 while completed < len(self.steps) and self.processStatus == "success": made_progress = False completed = 0 for step in self.steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, **kwargs): if newjob: made_progress = True if newjob.outdir: output_dirs.add(newjob.outdir) yield newjob if not made_progress and completed < len(self.steps): yield None wo = object_from_state(self.state, self.tool["outputs"], True) if move_outputs: targets = set() conflicts = set() outfiles = findfiles(wo) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a)+1:]) if dst in targets: conflicts.add(dst) else: targets.add(dst) for f in outfiles: for a in output_dirs: if f["path"].startswith(a): src = f["path"] dst = os.path.join(self.outdir, src[len(a)+1:]) if dst in conflicts: sp = os.path.splitext(dst) dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1]) dirname = os.path.dirname(dst) if not os.path.exists(dirname): os.makedirs(dirname) _logger.debug("[workflow %s] Moving '%s' to '%s'", id(self), src, dst) shutil.move(src, dst) f["path"] = dst for a in output_dirs: if os.path.exists(a) and empty_subtree(a): _logger.debug("[workflow %s] Removing intermediate output directory %s", id(self), a) shutil.rmtree(a, True) _logger.info("[workflow %s] outdir is %s", id(self), self.outdir) output_callback(wo, self.processStatus)