def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer": try: obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir} return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine": engineConfig = [] for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine": engineConfig = r.get("engineConfig", []) break return sandboxjs.execjs(ex["script"], jshead(engineConfig, jobinput, context, tmpdir, outdir)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") img_id = None if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] inp = { "script": ex["script"], "engineConfig": r.get("engineConfig", []), "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer": try: obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir} return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") img_id = None if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("engineConfig", []): if isinstance(exdef, dict) and "ref" in exdef: with open(exdef["ref"][7:]) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "engineConfig": exdefs, "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def get_from_requirements(requirements, hints, pull_image, dry_run=False): (r, req) = process.get_feature("DockerRequirement", requirements=requirements, hints=hints) if r: if get_image(r, pull_image, dry_run): return r["dockerImageId"] else: if req: raise Exception("Docker image %s not found" % r["dockerImageId"]) return None
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): #_logger.info("[job %s] starting with outdir %s", id(self), self.outdir) if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.exists(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException("Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/tmp/job_output")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp/job_tmp")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp/job_tmp") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir stdin = None stdout = None _logger.info("[job %s] exec %s%s%s", id(self), " ".join([shellescape.quote(arg) if needs_shell_quoting(arg) else arg for arg in (runtime + self.command_line)]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.symlink(self.generatefiles[t]["path"], os.path.join(self.outdir, t)) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen(runtime + self.command_line, shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.remove(os.path.join(self.outdir, t)) os.symlink(self.pathmapper.reversemap(self.generatefiles[t]["path"])[1], os.path.join(self.outdir, t)) outputs = self.collect_outputs(self.outdir) except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", id(self), processStatus) else: _logger.debug("[job %s] completed %s", id(self), processStatus) _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", id(self), self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", id(self), self.tmpdir) shutil.rmtree(self.outdir, True)
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.isfile(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found or is not a regular file." % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException("Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) runtime.append("--workdir=%s" % ("/var/spool/cwl")) runtime.append("--read-only=true") runtime.append("--net=none") euid = docker_vm_uid() or os.geteuid() runtime.append("--user=%s" % (euid)) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir vars_to_preserve = kwargs.get("preserve_environment") if vars_to_preserve is not None: for key, value in os.environ.items(): if key in vars_to_preserve and key not in env: env[key] = value stdin = None stdout = None scr, _ = get_feature(self, "ShellCommandRequirement") if scr: shouldquote = lambda x: False else: shouldquote = needs_shell_quoting_re.search _logger.info("[job %s] %s$ %s%s%s", self.name, self.outdir, " ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src = self.generatefiles[t]["path"] dst = os.path.join(self.outdir, t) if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: _logger.debug("symlinking %s to %s", dst, src) os.symlink(src, dst) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen([str(x) for x in runtime + self.command_line], shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): src = self.generatefiles[t]["path"] dst = os.path.join(self.outdir, t) if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir: os.remove(dst) os.symlink(self.pathmapper.reversemap(src)[1], dst) outputs = self.collect_outputs(self.outdir) except OSError as e: if e.errno == 2: if runtime: _logger.error("'%s' not found", runtime[0]) else: _logger.error("'%s' not found", self.command_line[0]) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as e: _logger.error("Error while running job: %s" % e) processStatus = "permanentFail" except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", self.name, processStatus) else: _logger.debug("[job %s] completed %s", self.name, processStatus) _logger.debug("[job %s] %s", self.name, json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", self.name, self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", self.name, self.outdir) shutil.rmtree(self.outdir, True)
def try_make_job(self, step, basedir, **kwargs): inputobj = {} _logger.debug("Try to make job %s", step.id) (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints")) if scatterSpec: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(scatterSpec["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} if scatterSpec.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} else: inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] for inp in inputparms: _logger.debug("Trying input %s", inp) iid = inp["id"] if "connect" in inp: connections = inp["connect"] for connection in aslist(connections): src = connection["source"] if src in self.state and self.state[src] is not None: if not self.match_types(inp["type"], self.state[src], inp["id"], inputobj): raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"])) elif src not in self.state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", pprint.pformat(inputobj)) callback = functools.partial(self.receive_output, step, outputparms) if scatterSpec: method = scatterSpec.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) for j in jobs: yield j
def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs): if not os.path.exists(self.outdir): os.makedirs(self.outdir) #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: # json.dump(self.joborder, fp) runtime = [] env = {"TMPDIR": self.tmpdir} (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") for f in self.pathmapper.files(): if not os.path.exists(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0]) img_id = None if docker_req and kwargs.get("use_container") is not False: env = os.environ img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if docker_is_req and img_id is None: raise WorkflowException( "Docker is required for running this tool.") if img_id: runtime = ["docker", "run", "-i"] for src in self.pathmapper.files(): vol = self.pathmapper.mapper(src) runtime.append("--volume=%s:%s:ro" % vol) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/tmp/job_output")) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp/job_tmp")) runtime.append("--workdir=%s" % ("/tmp/job_output")) euid = docker_vm_uid() or os.geteuid() runtime.append("--user=%s" % (euid)) if rm_container: runtime.append("--rm") runtime.append("--env=TMPDIR=/tmp/job_tmp") for t, v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment if not os.path.exists(self.tmpdir): os.makedirs(self.tmpdir) env["TMPDIR"] = self.tmpdir stdin = None stdout = None _logger.info( "[job %s] %s$ %s%s%s", id(self), self.outdir, " ".join([ shellescape.quote(arg) if needs_shell_quoting(arg) else arg for arg in (runtime + self.command_line) ]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '') if dry_run: return (self.outdir, {}) outputs = {} try: for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.symlink(self.generatefiles[t]["path"], os.path.join(self.outdir, t)) else: with open(os.path.join(self.outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb") else: stdin = subprocess.PIPE if self.stdout: absout = os.path.join(self.outdir, self.stdout) dn = os.path.dirname(absout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(absout, "wb") else: stdout = sys.stderr sp = subprocess.Popen(runtime + self.command_line, shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=self.outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" for t in self.generatefiles: if isinstance(self.generatefiles[t], dict): os.remove(os.path.join(self.outdir, t)) os.symlink( self.pathmapper.reversemap( self.generatefiles[t]["path"])[1], os.path.join(self.outdir, t)) outputs = self.collect_outputs(self.outdir) except OSError as e: if e.errno == 2: if runtime: _logger.error("'%s' not found", runtime[0]) else: _logger.error("'%s' not found", self.command_line[0]) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as e: _logger.error("Error while running job: %s" % e) processStatus = "permanentFail" except Exception as e: _logger.exception("Exception while running job") processStatus = "permanentFail" if processStatus != "success": _logger.warn("[job %s] completed %s", id(self), processStatus) else: _logger.debug("[job %s] completed %s", id(self), processStatus) _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4)) self.output_callback(outputs, processStatus) if rm_tmpdir: _logger.debug("[job %s] Removing temporary directory %s", id(self), self.tmpdir) shutil.rmtree(self.tmpdir, True) if move_outputs and empty_subtree(self.outdir): _logger.debug("[job %s] Removing empty output directory %s", id(self), self.tmpdir) shutil.rmtree(self.outdir, True)
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "cwl:JsonPointer": try: obj = { "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir } return avro_ld.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[ "engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("engineConfig", []): if isinstance(exdef, dict) and "ref" in exdef: with open(exdef["ref"][7:]) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "engineConfig": exdefs, "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException( "Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def try_make_job(self, step, basedir, **kwargs): inputobj = {} (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints")) if scatterSpec: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(scatterSpec["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} if scatterSpec.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} else: inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] for inp in inputparms: _logger.debug(inp) iid = idk(inp["id"]) if "connect" in inp: connections = inp["connect"] is_array = isinstance(inp["type"], dict) and inp["type"]["type"] == "array" for connection in aslist(connections): src = idk(connection["source"]) if src in self.state and self.state[src] is not None: if self.state[src].parameter["type"] == inp["type"]: # source and input types are the same if is_array and iid in inputobj: # there's already a value in the input object, so extend the existing array inputobj[iid].extend(self.state[src].value) else: # simply assign the value from state to input inputobj[iid] = copy.deepcopy(self.state[src].value) elif is_array and self.state[src].parameter["type"] == inp["type"]["items"]: # source type is the item type on the input array # promote single item to array entry if iid in inputobj: inputobj[iid].append(self.state[src].value) else: inputobj[iid] = [self.state[src].value] else: raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], idk(inp["id"]), inp["type"])) elif src not in self.state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", inputobj) callback = functools.partial(self.receive_output, step, outputparms) if scatterSpec: method = scatterSpec.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) for j in jobs: yield j
def run(self, outdir, dry_run=False, pull_image=True, rm_container=True): with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: json.dump(self.joborder, fp) runtime = [] env = {} (docker_req, docker_is_req) = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints) for f in self.pathmapper.files(): if not os.path.exists(self.pathmapper.mapper(f)[0]): raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0]) if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) if rm_container: runtime.append("--rm") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(img_id) else: env = self.environment stdin = None stdout = None _logger.info("%s%s%s", " ".join(runtime + self.command_line), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' % (self.stdout) if self.stdout else '') if dry_run: return (outdir, {}) os.chdir(outdir) for t in self.generatefiles: with open(os.path.join(outdir, t), "w") as f: f.write(self.generatefiles[t]) if self.stdin: stdin = open(self.stdin, "rb") else: stdin = subprocess.PIPE if self.stdout: dn = os.path.dirname(self.stdout) if dn and not os.path.exists(dn): os.makedirs(dn) stdout = open(self.stdout, "wb") else: stdout = sys.stderr sp = subprocess.Popen(runtime + self.command_line, shell=False, close_fds=True, stdin=stdin, stdout=stdout, env=env, cwd=outdir) if stdin == subprocess.PIPE: sp.stdin.close() rcode = sp.wait() if stdin != subprocess.PIPE: stdin.close() if stdout is not sys.stderr: stdout.close() outputs = self.collect_outputs(outdir) if self.successCodes and rcode in self.successCodes: processStatus = "success" elif self.temporaryFailCodes and rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif self.permanentFailCodes and rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" self.output_callback(outputs, processStatus)
def job(self, joborder, input_basedir, output_callback, use_container=True, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) reffiles = [f["path"] for f in builder.files] j = CommandLineJob() j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints builder.pathmapper = None if self.tool.get("stdin"): j.stdin = expression.do_eval(self.tool["stdin"], builder.job, self.requirements, self.docpath, j.stdin) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.append(j.stdin) if self.tool.get("stdout"): j.stdout = expression.do_eval(self.tool["stdout"], builder.job, j.requirements, self.docpath) if os.path.isabs(j.stdout): raise validate.ValidationException("stdout must be a relative path") dockerReq, _ = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints) if dockerReq and use_container: builder.pathmapper = DockerPathMapper(reffiles, input_basedir) if builder.pathmapper is None: builder.pathmapper = PathMapper(reffiles, input_basedir) for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"])[1] _logger.debug("Bindings is %s", pprint.pformat(builder.bindings)) _logger.debug("Files is %s", pprint.pformat({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()})) builder.requirements = j.requirements j.generatefiles = {} createFiles, _ = get_feature("CreateFileRequirement", requirements=self.requirements, hints=self.hints) if createFiles: for t in createFiles["fileDef"]: j.generatefiles[t["filename"]] = expression.do_eval(t["fileContent"], builder.job, j.requirements, self.docpath) j.environment = {} evr, _ = get_feature("EnvVarRequirement", requirements=self.requirements, hints=self.hints) if evr: for t in evr["envDef"]: j.environment[t["envName"]] = expression.do_eval(t["envValue"], builder.job, j.requirements, self.docpath) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) if j.stdin: j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(input_basedir, j.stdin) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j