def __init__(self, toc, j, renderlist, redirects): self.typedoc = StringIO.StringIO() self.toc = toc self.subs = {} self.docParent = {} self.docAfter = {} self.rendered = set() self.redirects = redirects self.title = None for t in j: if "extends" in t: for e in aslist(t["extends"]): add_dictlist(self.subs, e, t["name"]) #if "docParent" not in t and "docAfter" not in t: # add_dictlist(self.docParent, e, t["name"]) if t.get("docParent"): add_dictlist(self.docParent, t["docParent"], t["name"]) if t.get("docChild"): for c in aslist(t["docChild"]): add_dictlist(self.docParent, t["name"], c) if t.get("docAfter"): add_dictlist(self.docAfter, t["docAfter"], t["name"]) _, _, metaschema_loader = schema.get_metaschema() alltypes = schema.extend_and_specialize(j, metaschema_loader) self.typemap = {} self.uses = {} self.record_refs = {} for t in alltypes: self.typemap[t["name"]] = t try: if t["type"] == "record": self.record_refs[t["name"]] = [] for f in t.get("fields", []): p = has_types(f) for tp in p: if tp not in self.uses: self.uses[tp] = [] if (t["name"], f["name"]) not in self.uses[tp]: _, frg1 = urlparse.urldefrag(t["name"]) _, frg2 = urlparse.urldefrag(f["name"]) self.uses[tp].append((frg1, frg2)) if tp not in basicTypes and tp not in self.record_refs[t["name"]]: self.record_refs[t["name"]].append(tp) except KeyError as e: _logger.error("Did not find 'type' in %s", t) raise for f in alltypes: if (f["name"] in renderlist or ((not renderlist) and ("extends" not in f) and ("docParent" not in f) and ("docAfter" not in f))): self.render_type(f, 1)
def __init__(self, toolpath_object, validateAs, docpath): self.names = get_schema() self.docpath = docpath self.tool = toolpath_object # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) self.validate_requirements(self.tool, "requirements") self.validate_requirements(self.tool, "hints") for t in self.tool.get("requirements", []): t["_docpath"] = docpath for t in self.tool.get("hints", []): t["_docpath"] = docpath # Import schema defs self.schemaDefs = { "Any": [ "null", "boolean", "int", "long", "float", "double", "bytes", "string", "File", {"type": "array", "items": "Any"}, {"type": "map", "values": "Any"} ]} sd, _ = get_feature("SchemaDefRequirement", requirements=self.tool.get("requirements"), hints=self.tool.get("hints")) if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def scandeps(base, doc, reffields, urlfields, loadref): r = [] if isinstance(doc, dict): if "id" in doc: if doc["id"].startswith("file://"): df, _ = urlparse.urldefrag(doc["id"]) if base != df: base = df for k, v in doc.iteritems(): if k in reffields: for u in aslist(v): sub = loadref(base, u) if isinstance(sub, dict): subid = sub["id"] else: subid = urlparse.urljoin(base, u) deps = {"class": "File", "path": subid} sf = scandeps(subid, sub, reffields, urlfields, loadref) if sf: deps["secondaryFiles"] = sf r.append(deps) elif k in urlfields: for u in aslist(v): r.append({ "class": "File", "path": urlparse.urljoin(base, u) }) else: r.extend(scandeps(base, v, reffields, urlfields, loadref)) elif isinstance(doc, list): for d in doc: r.extend(scandeps(base, d, reffields, urlfields, loadref)) return r
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer": try: obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir} return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") img_id = None if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("engineConfig", []): if isinstance(exdef, dict) and "ref" in exdef: with open(exdef["ref"][7:]) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "engineConfig": exdefs, "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: r = [] bg = builder.do_eval(binding["glob"]) for gb in aslist(bg): r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024 * 1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r): raise WorkflowException("Expression must return a file object.") if schema["type"] == "File": if not r: raise WorkflowException("No matches for output file with glob: '{}'".format(bg)) if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file.") r = r[0] if schema["type"] == "File" and "secondaryFiles" in binding: r["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = builder.do_eval(sf, context=r["path"]) else: sfpath = {"path": substitute(r["path"], sf), "class": "File"} if isinstance(sfpath, list): r["secondaryFiles"].extend(sfpath) else: r["secondaryFiles"].append(sfpath) for sf in r["secondaryFiles"]: if not builder.fs_access.exists(sf["path"]): raise WorkflowException( "Missing secondary file of '%s' of primary file '%s'" % (sf["path"], r["path"]) ) if not r and schema["type"] == "record": r = {} for f in schema["fields"]: r[f["name"]] = self.collect_output(f, builder, outdir) return r
def __init__(self, toc, j, renderlist, redirects): self.typedoc = StringIO.StringIO() self.toc = toc self.subs = {} self.docParent = {} self.docAfter = {} self.rendered = set() self.redirects = redirects self.title = None for t in j: if "extends" in t: for e in aslist(t["extends"]): add_dictlist(self.subs, e, t["name"]) #if "docParent" not in t and "docAfter" not in t: # add_dictlist(self.docParent, e, t["name"]) if t.get("docParent"): add_dictlist(self.docParent, t["docParent"], t["name"]) if t.get("docChild"): for c in aslist(t["docChild"]): add_dictlist(self.docParent, t["name"], c) if t.get("docAfter"): add_dictlist(self.docAfter, t["docAfter"], t["name"]) _, _, metaschema_loader = schema.get_metaschema() alltypes = schema.extend_and_specialize(j, metaschema_loader) self.typemap = {} self.uses = {} self.record_refs = {} for t in alltypes: self.typemap[t["name"]] = t try: if t["type"] == "record": self.record_refs[t["name"]] = [] for f in t.get("fields", []): p = has_types(f) for tp in p: if tp not in self.uses: self.uses[tp] = [] if (t["name"], f["name"]) not in self.uses[tp]: _, frg1 = urlparse.urldefrag(t["name"]) _, frg2 = urlparse.urldefrag(f["name"]) self.uses[tp].append((frg1, frg2)) if tp not in basicTypes and tp not in self.record_refs[ t["name"]]: self.record_refs[t["name"]].append(tp) except KeyError as e: _logger.error("Did not find 'type' in %s", t) raise for f in alltypes: if (f["name"] in renderlist or ((not renderlist) and ("extends" not in f) and ("docParent" not in f) and ("docAfter" not in f))): self.render_type(f, 1)
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer": try: obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir} return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine": engineConfig = [] for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine": engineConfig = r.get("engineConfig", []) break return sandboxjs.execjs(ex["script"], jshead(engineConfig, jobinput, context, tmpdir, outdir)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") img_id = None if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] inp = { "script": ex["script"], "engineConfig": r.get("engineConfig", []), "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def checkFormat(actualFile, inputFormats, requirements, ontology): for af in aslist(actualFile): if "format" not in af: raise validate.ValidationException("Missing required 'format' for File %s" % af) for inpf in aslist(inputFormats): if af["format"] == inpf or formatSubclassOf(af["format"], inpf, ontology, set()): return raise validate.ValidationException("Incompatible file format %s required format(s) %s" % (af["format"], inputFormats))
def __init__(self, toolpath_object, **kwargs): (_, self.names, _) = get_schema() self.tool = toolpath_object self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) if "loader" in kwargs: self.formatgraph = kwargs["loader"].graph else: self.formatgraph = None self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: sdtypes = sd["types"] av = schema_salad.schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set()) for i in av: self.schemaDefs[i["name"]] = i avro.schema.make_avsc_object(av, self.names) # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for key in ("inputs", "outputs"): for i in self.tool[key]: c = copy.copy(i) doc_url, _ = urlparse.urldefrag(c['id']) c["name"] = shortname(c["id"]) del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c and "null" not in aslist(c["type"]): c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] if key == "inputs": self.inputs_record_schema["fields"].append(c) elif key == "outputs": self.outputs_record_schema["fields"].append(c) try: self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException("Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4))) try: self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException("Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
def checkFormat(actualFile, inputFormats, requirements, ontology): for af in aslist(actualFile): if "format" not in af: raise validate.ValidationException( "Missing required 'format' for File %s" % af) for inpf in aslist(inputFormats): if af["format"] == inpf or formatSubclassOf( af["format"], inpf, ontology, set()): return raise validate.ValidationException( "Incompatible file format %s required format(s) %s" % (af["format"], inputFormats))
def merge_properties(a, b): c = {} for i in a: if i not in b: c[i] = a[i] for i in b: if i not in a: c[i] = b[i] for i in a: if i in b: c[i] = aslist(a[i]) + aslist(b[i]) return c
def object_from_state(state, parms, frag_only, supportsMultipleInput): inputobj = {} for inp in parms: iid = inp["id"] if frag_only: iid = shortname(iid) if "source" in inp: if isinstance(inp["source"], list) and not supportsMultipleInput: raise WorkflowException("Workflow contains multiple inbound links to a single parameter but MultipleInputFeatureRequirement is not declared.") connections = aslist(inp["source"]) for src in connections: if src in state and state[src] is not None: if not match_types(inp["type"], state[src], iid, inputobj, inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None)), valueFrom=inp.get("valueFrom")): raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, state[src].parameter["type"], inp["id"], inp["type"])) elif src not in state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return None elif "default" in inp: inputobj[iid] = inp["default"] elif "valueFrom" in inp: inputobj[iid] = None else: raise WorkflowException("Value for %s not specified" % (inp["id"])) return inputobj
def object_from_state(state, parms, frag_only): inputobj = {} for inp in parms: iid = inp["id"] if frag_only: (_, iid) = urlparse.urldefrag(iid) iid = iid.split(".")[-1] if "source" in inp: connections = aslist(inp["source"]) for src in connections: if src in state and state[src] is not None: if not match_types( inp["type"], state[src], iid, inputobj, inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None))): raise WorkflowException( "Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, state[src].parameter["type"], inp["id"], inp["type"])) elif src not in state: raise WorkflowException( "Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return None elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) return inputobj
def adjust_for_scatter(self, steps): (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement") for step in steps: if scatterSpec and "scatter" in step.tool: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} if step.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} step.tool["inputs"] = inputparms step.tool["outputs"] = outputparms
def adjust_for_scatter(self, steps): (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement") for step in steps: if scatterSpec and "scatter" in step.tool: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException( "Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = { "type": "array", "items": inp_map[s]["type"] } if step.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} step.tool["inputs"] = inputparms step.tool["outputs"] = outputparms
def scandeps(base, doc, reffields, urlfields, loadref): r = [] if isinstance(doc, dict): if "id" in doc: if doc["id"].startswith("file://"): df, _ = urlparse.urldefrag(doc["id"]) if base != df: r.append({ "class": "File", "path": df }) base = df for k, v in doc.iteritems(): if k in reffields: for u in aslist(v): if isinstance(u, dict): r.extend(scandeps(base, u, reffields, urlfields, loadref)) else: sub = loadref(base, u) subid = urlparse.urljoin(base, u) deps = { "class": "File", "path": subid } sf = scandeps(subid, sub, reffields, urlfields, loadref) if sf: deps["secondaryFiles"] = sf r.append(deps) elif k in urlfields: for u in aslist(v): r.append({ "class": "File", "path": urlparse.urljoin(base, u) }) else: r.extend(scandeps(base, v, reffields, urlfields, loadref)) elif isinstance(doc, list): for d in doc: r.extend(scandeps(base, d, reffields, urlfields, loadref)) return r
def try_make_job(self, step, basedir, **kwargs): inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] try: inputobj = object_from_state(self.state, inputparms, False) if inputobj is None: _logger.debug("[workflow %s] job step %s not ready", id(self), step.id) return _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self)) if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) if "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except Exception as e: _logger.exception("Unhandled exception") self.processStatus = "permanentFail" step.completed = True
def exeval(ex, jobinput, requirements, docpath, context, pull_image): for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[ "engine"]: if r["id"][0] != "#": with open(os.path.join(docpath, r["id"])) as f: ex_obj = yaml.load(f) sch = process.get_schema() validate.validate_ex( sch.get_name("ExpressionEngineRequirement", ""), ex_obj) r = ex_obj runtime = [] img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("expressionDefs", []): if isinstance(exdef, dict) and "ref" in exdef: with open(os.path.join(r["_docpath"], exdef["ref"])) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "expressionDefs": exdefs, "job": jobinput, "context": context } _logger.debug(json.dumps(inp)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException( "Expression engine returned non-zero exit code.") return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def try_make_job(self, step, basedir, **kwargs): _logger.debug("Try to make job %s", step.id) inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] try: inputobj = self.object_from_state(inputparms, False) if inputobj is None: return if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement") if scatterSpec and "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except Exception as e: _logger.error(e) self.processStatus = "permanentFail" step.completed = True
def exeval(ex, jobinput, requirements, docpath, context, pull_image): if ex["engine"] == "JsonPointer": return ref_resolver.resolve_pointer({"job": jobinput, "context": context}, ex["script"]) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: if r["id"][0] != "#": with open(os.path.join(docpath, r["id"])) as f: ex_obj = yaml.load(f) sch = process.get_schema() validate.validate_ex(sch.get_name("ExpressionEngineRequirement", ""), ex_obj) r = ex_obj runtime = [] img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("expressionDefs", []): if isinstance(exdef, dict) and "ref" in exdef: with open(os.path.join(r["_docpath"], exdef["ref"])) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "expressionDefs": exdefs, "job": jobinput, "context": context } _logger.debug(json.dumps(inp)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code.") return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def add_schemas(self, ns, base_url): for sch in aslist(ns): self.graph.parse(urlparse.urljoin(base_url, sch)) for s, _, _ in self.graph.triples( (None, RDF.type, RDF.Property) ): self._add_properties(s) for s, _, o in self.graph.triples( (None, RDFS.subPropertyOf, None) ): self._add_properties(s) self._add_properties(o) for s, _, _ in self.graph.triples( (None, RDFS.range, None) ): self._add_properties(s) for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty) ): self._add_properties(s) for s, _, _ in self.graph.triples( (None, None, None) ): self.idx[str(s)] = True
def add_schemas(self, ns, base_url): for sch in aslist(ns): self.graph.parse(urlparse.urljoin(base_url, sch)) for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)): self._add_properties(s) for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)): self._add_properties(s) self._add_properties(o) for s, _, _ in self.graph.triples((None, RDFS.range, None)): self._add_properties(s) for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty)): self._add_properties(s) for s, _, _ in self.graph.triples((None, None, None)): self.idx[str(s)] = True
def _draftDraft3dev1toDev2(doc, loader, baseuri): doc = _updateDev2Script(doc) if isinstance(doc, basestring): return doc # Convert expressions if isinstance(doc, dict): if "@import" in doc: r, _ = loader.resolve_ref(doc["@import"], base_url=baseuri) return _draftDraft3dev1toDev2(r, loader, r["id"]) for a in doc: doc[a] = _draftDraft3dev1toDev2(doc[a], loader, baseuri) if "class" in doc and (doc["class"] in ("CommandLineTool", "Workflow", "ExpressionTool")): added = False if "requirements" in doc: for r in doc["requirements"]: if r["class"] == "ExpressionEngineRequirement": if "engineConfig" in r: doc["requirements"].append({ "class": "InlineJavascriptRequirement", "expressionLib": [ updateScript(sc) for sc in aslist(r["engineConfig"]) ] }) added = True doc["requirements"] = [ rq for rq in doc["requirements"] if rq["class"] != "ExpressionEngineRequirement" ] break else: doc["requirements"] = [] if not added: doc["requirements"].append( {"class": "InlineJavascriptRequirement"}) elif isinstance(doc, list): return [_draftDraft3dev1toDev2(a, loader, baseuri) for a in doc] return doc
def try_make_job(self, step, basedir, **kwargs): inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0]) try: inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput) if inputobj is None: _logger.debug("[workflow %s] job step %s not ready", id(self), step.id) return _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self)) if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) if "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except WorkflowException: raise except Exception as e: _logger.exception("Unhandled exception") self.processStatus = "permanentFail" step.completed = True
def add_schemas(self, ns, base_url): for sch in aslist(ns): try: self.graph.parse(urlparse.urljoin(base_url, sch), format="xml") except xml.sax.SAXParseException: self.graph.parse(urlparse.urljoin(base_url, sch), format="turtle") for s, _, _ in self.graph.triples( (None, RDF.type, RDF.Property) ): self._add_properties(s) for s, _, o in self.graph.triples( (None, RDFS.subPropertyOf, None) ): self._add_properties(s) self._add_properties(o) for s, _, _ in self.graph.triples( (None, RDFS.range, None) ): self._add_properties(s) for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty) ): self._add_properties(s) for s, _, _ in self.graph.triples( (None, None, None) ): self.idx[str(s)] = True
def add_schemas(self, ns, base_url): for sch in aslist(ns): try: self.graph.parse(urlparse.urljoin(base_url, sch), format="xml") except xml.sax.SAXParseException: self.graph.parse(urlparse.urljoin(base_url, sch), format="turtle") for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)): self._add_properties(s) for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)): self._add_properties(s) self._add_properties(o) for s, _, _ in self.graph.triples((None, RDFS.range, None)): self._add_properties(s) for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty)): self._add_properties(s) for s, _, _ in self.graph.triples((None, None, None)): self.idx[str(s)] = True
def try_make_job(self, step, basedir, **kwargs): _logger.debug("Try to make job %s", step.id) inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] try: inputobj = self.object_from_state(inputparms, False) if inputobj is None: return if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement") if scatterSpec and "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except Exception as e: _logger.error(e) self.processStatus = "permanentFail" step.completed = True
def __init__(self, toc, j): self.typedoc = StringIO.StringIO() self.toc = toc self.subs = {} self.docParent = {} self.docAfter = {} for t in j: if "extends" in t: for e in aslist(t["extends"]): add_dictlist(self.subs, e, t["name"]) if "docParent" not in t and "docAfter" not in t: add_dictlist(self.docParent, e, t["name"]) if t.get("docParent"): add_dictlist(self.docParent, t["docParent"], t["name"]) if t.get("docAfter"): add_dictlist(self.docAfter, t["docAfter"], t["name"]) _, _, metaschema_loader = schema.get_metaschema() alltypes = schema.extend_and_specialize(j, metaschema_loader) self.typemap = {} self.uses = {} for t in alltypes: self.typemap[t["name"]] = t if t["type"] == "https://w3id.org/cwl/salad#record": for f in t["fields"]: p = has_types(f) for tp in p: if tp not in self.uses: self.uses[tp] = [] if (t["name"], f["name"]) not in self.uses[tp]: _, frg1 = urlparse.urldefrag(t["name"]) _, frg2 = urlparse.urldefrag(f["name"]) self.uses[tp].append((frg1, frg2)) for f in alltypes: if ("extends" not in f) and ("docParent" not in f) and ("docAfter" not in f): self.render_type(f, 1)
def object_from_state(self, parms, frag_only): inputobj = {} for inp in parms: iid = inp["id"] if frag_only: (_, iid) = urlparse.urldefrag(iid) iid = iid.split(".")[-1] if "source" in inp: connections = aslist(inp["source"]) for src in connections: if src in self.state and self.state[src] is not None: if not self.match_types(inp["type"], self.state[src], iid, inputobj, inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None))): raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"])) elif src not in self.state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return None elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) return inputobj
def _draftDraft3dev1toDev2(doc, loader, baseuri): doc = _updateDev2Script(doc) if isinstance(doc, basestring): return doc # Convert expressions if isinstance(doc, dict): if "@import" in doc: r, _ = loader.resolve_ref(doc["@import"], base_url=baseuri) return _draftDraft3dev1toDev2(r, loader, r["id"]) for a in doc: doc[a] = _draftDraft3dev1toDev2(doc[a], loader, baseuri) if "class" in doc and (doc["class"] in ("CommandLineTool", "Workflow", "ExpressionTool")): added = False if "requirements" in doc: for r in doc["requirements"]: if r["class"] == "ExpressionEngineRequirement": if "engineConfig" in r: doc["requirements"].append({ "class":"InlineJavascriptRequirement", "expressionLib": [updateScript(sc) for sc in aslist(r["engineConfig"])] }) added = True doc["requirements"] = [rq for rq in doc["requirements"] if rq["class"] != "ExpressionEngineRequirement"] break else: doc["requirements"] = [] if not added: doc["requirements"].append({"class":"InlineJavascriptRequirement"}) elif isinstance(doc, list): return [_draftDraft3dev1toDev2(a, loader, baseuri) for a in doc] return doc
def resolve_all(self, document, base_url, file_base=None): loader = self metadata = {} if file_base is None: file_base = base_url if isinstance(document, dict): # Handle $import and $include if ('$import' in document or '$include' in document): return self.resolve_ref(document, file_base) elif isinstance(document, list): pass else: return document, metadata newctx = None if isinstance(document, dict): # Handle $base, $profile, $namespaces, $schemas and $graph if "$base" in document: base_url = document["$base"] if "$profile" in document: if not newctx: newctx = SubLoader(self) prof = self.fetch(document["$profile"]) newctx.add_namespaces(document.get("$namespaces", {}), document["$profile"]) newctx.add_schemas(document.get("$schemas", []), document["$profile"]) if "$namespaces" in document: if not newctx: newctx = SubLoader(self) newctx.add_namespaces(document["$namespaces"]) if "$schemas" in document: if not newctx: newctx = SubLoader(self) newctx.add_schemas(document["$schemas"], file_base) if newctx: loader = newctx if "$graph" in document: metadata = {k: v for k, v in document.items() if k != "$graph"} document = document["$graph"] metadata, _ = loader.resolve_all(metadata, base_url, file_base) if isinstance(document, dict): for identifer in loader.identity_links: if identifer in document: if isinstance(document[identifer], basestring): document[identifer] = loader.expand_url( document[identifer], base_url, scoped=True) if document[identifer] not in loader.idx or isinstance( loader.idx[document[identifer]], basestring): loader.idx[document[identifer]] = document base_url = document[identifer] elif isinstance(document[identifer], list): for n, v in enumerate(document[identifer]): document[identifer][n] = loader.expand_url( document[identifer][n], base_url, scoped=True) if document[identifer][n] not in loader.idx: loader.idx[document[identifer] [n]] = document[identifer][n] for d in document: d2 = loader.expand_url(d, "", scoped=False, vocab_term=True) if d != d2: document[d2] = document[d] del document[d] for d in loader.url_fields: if d in document: if isinstance(document[d], basestring): document[d] = loader.expand_url( document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) elif isinstance(document[d], list): document[d] = [ loader.expand_url( url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ] try: for key, val in document.items(): document[key], _ = loader.resolve_all( val, base_url, file_base) except validate.ValidationException as v: _logger.debug("loader is %s", id(loader)) raise validate.ValidationException( "(%s) (%s) Validation error in field %s:\n%s" % (id(loader), file_base, key, validate.indent(str(v)))) elif isinstance(document, list): i = 0 try: while i < len(document): val = document[i] if isinstance(val, dict) and "$import" in val: l, _ = loader.resolve_ref(val, file_base) if isinstance(l, list): del document[i] for item in aslist(l): document.insert(i, item) i += 1 else: document[i] = l i += 1 else: document[i], _ = loader.resolve_all( val, base_url, file_base) i += 1 except validate.ValidationException as v: raise validate.ValidationException( "(%s) (%s) Validation error in position %i:\n%s" % (id(loader), file_base, i, validate.indent(str(v)))) for identifer in loader.identity_links: if identifer in metadata: if isinstance(metadata[identifer], basestring): metadata[identifer] = loader.expand_url( metadata[identifer], base_url, scoped=True) loader.idx[metadata[identifer]] = document return document, metadata
def __init__(self, toolpath_object, **kwargs): (_, self.names, _) = get_schema() self.tool = toolpath_object self.requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) if "loader" in kwargs: self.formatgraph = kwargs["loader"].graph self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: sdtypes = sd["types"] av = schema_salad.schema.make_valid_avro( sdtypes, {t["name"]: t for t in sdtypes}, set()) for i in av: self.schemaDefs[i["name"]] = i avro.schema.make_avsc_object(av, self.names) # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": [] } self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": [] } for key in ("inputs", "outputs"): for i in self.tool[key]: c = copy.copy(i) doc_url, _ = urlparse.urldefrag(c['id']) c["name"] = shortname(c["id"]) del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c and "null" not in aslist(c["type"]): c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] if key == "inputs": self.inputs_record_schema["fields"].append(c) elif key == "outputs": self.outputs_record_schema["fields"].append(c) try: self.inputs_record_schema = schema_salad.schema.make_valid_avro( self.inputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException( "Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4))) try: self.outputs_record_schema = schema_salad.schema.make_valid_avro( self.outputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException( "Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
def try_make_job(self, step, basedir, **kwargs): inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0]) try: inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput) if inputobj is None: _logger.debug("[workflow %s] job step %s not ready", self.name, step.id) return _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self)) if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) valueFrom = {i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i} if len(valueFrom) > 0 and not bool(self.workflow.get_requirement("StepInputExpressionRequirement")[0]): raise WorkflowException("Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements") vfinputs = {shortname(k): v for k,v in inputobj.iteritems()} def valueFromFunc(k, v): if k in valueFrom: return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements, None, None, {}, context=v) else: return v if "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if "valueFrom" not in kwargs: kwargs["valueFrom"] = valueFromFunc if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: _logger.debug("[workflow %s] Job is input %s", self.name, json.dumps(inputobj, indent=4)) inputobj = {k: valueFromFunc(k, v) for k,v in inputobj.items()} _logger.debug("[workflow %s] Evaluated job input to %s", self.name, json.dumps(inputobj, indent=4)) jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except WorkflowException: raise except Exception as e: _logger.exception("Unhandled exception") self.processStatus = "permanentFail" step.completed = True
def resolve_all(self, document, base_url, file_base=None): loader = self metadata = {} if file_base is None: file_base = base_url if isinstance(document, dict): # Handle $import and $include if ('$import' in document or '$include' in document): return self.resolve_ref(document, file_base) elif isinstance(document, list): pass else: return document, metadata newctx = None if isinstance(document, dict): # Handle $base, $profile, $namespaces, $schemas and $graph if "$base" in document: base_url = document["$base"] if "$profile" in document: if not newctx: newctx = SubLoader(self) prof = self.fetch(document["$profile"]) newctx.add_namespaces(document.get("$namespaces", {}), document["$profile"]) newctx.add_schemas(document.get("$schemas", []), document["$profile"]) if "$namespaces" in document: if not newctx: newctx = SubLoader(self) newctx.add_namespaces(document["$namespaces"]) if "$schemas" in document: if not newctx: newctx = SubLoader(self) newctx.add_schemas(document["$schemas"], file_base) if newctx: loader = newctx if "$graph" in document: metadata = {k: v for k,v in document.items() if k != "$graph"} document = document["$graph"] metadata, _ = loader.resolve_all(metadata, base_url, file_base) if isinstance(document, dict): for identifer in loader.identity_links: if identifer in document: if isinstance(document[identifer], basestring): document[identifer] = loader.expand_url(document[identifer], base_url, scoped=True) if document[identifer] not in loader.idx or isinstance(loader.idx[document[identifer]], basestring): loader.idx[document[identifer]] = document base_url = document[identifer] elif isinstance(document[identifer], list): for n, v in enumerate(document[identifer]): document[identifer][n] = loader.expand_url(document[identifer][n], base_url, scoped=True) if document[identifer][n] not in loader.idx: loader.idx[document[identifer][n]] = document[identifer][n] for d in document: d2 = loader.expand_url(d, "", scoped=False, vocab_term=True) if d != d2: document[d2] = document[d] del document[d] for d in loader.url_fields: if d in document: if isinstance(document[d], basestring): document[d] = loader.expand_url(document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) elif isinstance(document[d], list): document[d] = [loader.expand_url(url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ] try: for key, val in document.items(): document[key], _ = loader.resolve_all(val, base_url, file_base) except validate.ValidationException as v: _logger.debug("loader is %s", id(loader)) raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (id(loader), file_base, key, validate.indent(str(v)))) elif isinstance(document, list): i = 0 try: while i < len(document): val = document[i] if isinstance(val, dict) and "$import" in val: l, _ = loader.resolve_ref(val, file_base) if isinstance(l, list): del document[i] for item in aslist(l): document.insert(i, item) i += 1 else: document[i] = l i += 1 else: document[i], _ = loader.resolve_all(val, base_url, file_base) i += 1 except validate.ValidationException as v: raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (id(loader), file_base, i, validate.indent(str(v)))) return document, metadata
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: r = [] bg = builder.do_eval(binding["glob"]) for gb in aslist(bg): r.extend([{ "path": g, "class": "File" } for g in builder.fs_access.glob(os.path.join(outdir, gb)) ]) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024 * 1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r): raise WorkflowException( "Expression must return a file object.") if schema["type"] == "File": if not r: raise WorkflowException( "No matches for output file with glob: {}.".format( binding["glob"])) if len(r) > 1: raise WorkflowException( "Multiple matches for output item that is a single file." ) r = r[0] if schema["type"] == "File" and "secondaryFiles" in binding: r["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = builder.do_eval(sf, context=r["path"]) else: sfpath = { "path": substitute(r["path"], sf), "class": "File" } if isinstance(sfpath, list): r["secondaryFiles"].extend(sfpath) else: r["secondaryFiles"].append(sfpath) for sf in r["secondaryFiles"]: if not builder.fs_access.exists(sf["path"]): raise WorkflowException( "Missing secondary file of '%s' of primary file '%s'" % (sf["path"], r["path"])) if not r and schema["type"] == "record": r = {} for f in schema["fields"]: r[f["name"]] = self.collect_output(f, builder, outdir) return r
def __init__(self, toolpath_object, pos, **kwargs): try: makeTool = kwargs.get("makeTool") self.embedded_tool = makeTool(toolpath_object["run"], **kwargs) except validate.ValidationException as v: raise WorkflowException( "Tool definition %s failed validation:\n%s" % (toolpath_object["run"]["id"], validate.indent(str(v)))) if "id" in toolpath_object: self.id = toolpath_object["id"] else: self.id = "#step" + str(pos) for field in ("inputs", "outputs"): for i in toolpath_object[field]: inputid = i["id"] (_, d) = urlparse.urldefrag(inputid) frag = d.split(".")[-1] p = urlparse.urljoin(toolpath_object["run"].get("id", self.id), "#" + frag) found = False for a in self.embedded_tool.tool[field]: if a["id"] == p: i.update(a) found = True if not found: raise WorkflowException( "Did not find %s parameter '%s' in workflow step" % (field, p)) i["id"] = inputid super(WorkflowStep, self).__init__(toolpath_object, "Process", do_validate=False, **kwargs) if self.embedded_tool.tool["class"] == "Workflow": (feature, _) = self.get_requirement("SubworkflowFeatureRequirement") if not feature: raise WorkflowException( "Workflow contains embedded workflow but SubworkflowFeatureRequirement not declared" ) if "scatter" in self.tool: (feature, _) = self.get_requirement("ScatterFeatureRequirement") if not feature: raise WorkflowException( "Workflow contains scatter but ScatterFeatureRequirement not declared" ) inputparms = copy.deepcopy(self.tool["inputs"]) outputparms = copy.deepcopy(self.tool["outputs"]) scatter = aslist(self.tool["scatter"]) method = self.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = { "type": "array", "items": inp_map[s]["type"] } if self.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} self.tool["inputs"] = inputparms self.tool["outputs"] = outputparms
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def resolve_all(self, document, base_url): loader = self metadata = {} if isinstance(document, dict): inc = '@include' in document if '@import' in document or '@include' in document: document, _ = self.resolve_ref(document, base_url) else: for identifer in self.identity_links: if identifer in document: if isinstance(document[identifer], basestring): document[identifer] = self.expand_url(document[identifer], base_url, scoped=True) if document[identifer] not in self.idx or isinstance(self.idx[document[identifer]], basestring): self.idx[document[identifer]] = document base_url = document[identifer] elif isinstance(document[identifer], list): for n, v in enumerate(document[identifer]): document[identifer][n] = self.expand_url(document[identifer][n], base_url, scoped=True) if document[identifer][n] not in self.idx: self.idx[document[identifer][n]] = document[identifer][n] if inc: return document, {} if isinstance(document, dict) and "@context" in document: loader = Loader(self.ctx) loader.idx = self.idx loader.add_context(document["@context"]) if "@base" in loader.ctx: base_url = loader.ctx["@base"] if "@graph" in document: metadata = {k: v for k,v in document.items() if k != "@graph"} document = document["@graph"] metadata, _ = self.resolve_all(metadata, base_url) elif isinstance(document, list): pass else: return document, metadata try: if isinstance(document, dict): for d in document: d2 = self.expand_url(d, "", scoped=False, vocab_term=True) if d != d2: document[d2] = document[d] del document[d] for d in loader.url_fields: if d in document: if isinstance(document[d], basestring): document[d] = loader.expand_url(document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) elif isinstance(document[d], list): document[d] = [loader.expand_url(url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ] for key, val in document.items(): document[key], _ = loader.resolve_all(val, base_url) elif isinstance(document, list): i = 0 while i < len(document): val = document[i] if isinstance(val, dict) and "@import" in val and val.get("inline"): l, _ = loader.resolve_all(val, base_url) del document[i] for item in aslist(l): document.insert(i, item) i += 1 else: document[i], _ = loader.resolve_all(val, base_url) i += 1 except validate.ValidationException as v: if isinstance(key, basestring): raise validate.ValidationException("Validation error in field %s:\n%s" % (key, validate.indent(str(v)))) else: raise validate.ValidationException("Validation error in position %i:\n%s" % (key, validate.indent(str(v)))) return document, metadata
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image): if ex["engine"] == "cwl:JsonPointer": try: obj = { "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir } return avro_ld.ref_resolver.resolve_json_pointer(obj, ex["script"]) except ValueError as v: raise WorkflowException("%s in %s" % (v, obj)) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[ "engine"]: runtime = [] class DR(object): pass dr = DR() dr.requirements = r.get("requirements", []) dr.hints = r.get("hints", []) (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement") if docker_req: img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("engineConfig", []): if isinstance(exdef, dict) and "ref" in exdef: with open(exdef["ref"][7:]) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "engineConfig": exdefs, "job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir, } _logger.debug("Invoking expression engine %s with %s", runtime + aslist(r["engineCommand"]), json.dumps(inp, indent=4)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException( "Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4)) return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist( binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name( t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[ "type"] in ("array", "map"): st["inputBinding"] = {} bindings.extend( self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend( self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend( self.bind_input( { "type": schema["values"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend( self.bind_input( { "type": schema["items"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding and binding.get("loadContents"): with self.fs_access.open(datum["path"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = self.do_eval(sf, context=datum) if isinstance(sfpath, basestring): sfpath = {"path": sfpath, "class": "File"} else: sfpath = { "path": substitute(datum["path"], sf), "class": "File" } if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) for sf in datum.get("secondaryFiles", []): self.files.append(sf) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def try_make_job(self, step, basedir, **kwargs): inputobj = {} _logger.debug("Try to make job %s", step.id) (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints")) if scatterSpec: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(scatterSpec["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} if scatterSpec.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} else: inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] for inp in inputparms: _logger.debug("Trying input %s", inp) iid = inp["id"] if "connect" in inp: connections = inp["connect"] for connection in aslist(connections): src = connection["source"] if src in self.state and self.state[src] is not None: if not self.match_types(inp["type"], self.state[src], inp["id"], inputobj): raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"])) elif src not in self.state: raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", pprint.pformat(inputobj)) callback = functools.partial(self.receive_output, step, outputparms) if scatterSpec: method = scatterSpec.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) for j in jobs: yield j
def extendsfrom(item, ex): if "extends" in item: for e in aslist(item["extends"]): ex.insert(0, self.typemap[e]) extendsfrom(self.typemap[e], ex)
def process_type(t, g, context, defaultBase, namespaces, defaultPrefix): if t["type"] == "record": recordname = t["name"] _logger.debug("Processing record %s\n", t) classnode = URIRef(recordname) g.add((classnode, RDF.type, RDFS.Class)) split = urlparse.urlsplit(recordname) if "jsonldPrefix" in t: predicate = "%s:%s" % (t["jsonldPrefix"], recordname) elif split.scheme: (ns, ln) = rdflib.namespace.split_uri(unicode(recordname)) predicate = recordname recordname = ln else: predicate = "%s:%s" % (defaultPrefix, recordname) if context.get(recordname, predicate) != predicate: raise Exception("Predicate collision on '%s', '%s' != '%s'" % (recordname, context[t["name"]], predicate)) if not recordname: raise Exception() _logger.debug("Adding to context '%s' %s (%s)", recordname, predicate, type(predicate)) context[recordname] = predicate for i in t.get("fields", []): fieldname = i["name"] _logger.debug("Processing field %s", i) v = pred(t, i, fieldname, context, defaultPrefix, namespaces) if isinstance(v, basestring): v = v if v[0] != "@" else None else: v = v["_@id"] if v.get("_@id", "@")[0] != "@" else None if v: (ns, ln) = rdflib.namespace.split_uri(unicode(v)) if ns[0:-1] in namespaces: propnode = namespaces[ns[0:-1]][ln] else: propnode = URIRef(v) g.add((propnode, RDF.type, RDF.Property)) g.add((propnode, RDFS.domain, classnode)) # TODO generate range from datatype. if isinstance(i["type"], dict) and "name" in i["type"]: process_type(i["type"], g, context, defaultBase, namespaces, defaultPrefix) if "extends" in t: for e in aslist(t["extends"]): g.add((classnode, RDFS.subClassOf, URIRef(e))) elif t["type"] == "enum": _logger.debug("Processing enum %s", t["name"]) for i in t["symbols"]: pred(t, None, i, context, defaultBase, namespaces)
def __init__(self, toolpath_object, validateAs, docpath): self.names = get_schema() self.docpath = docpath self.tool = toolpath_object #if self.tool.get("@context") != TOOL_CONTEXT_URL: # raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) self.validate_requirements(self.tool, "requirements") self.validate_requirements(self.tool, "hints") for t in self.tool.get("requirements", []): t["_docpath"] = docpath for t in self.tool.get("hints", []): t["_docpath"] = docpath # Import schema defs self.schemaDefs = { "Any": [ "null", "boolean", "int", "long", "float", "double", "bytes", "string", "File", {"type": "array", "items": "Any"}, {"type": "map", "values": "Any"} ]} if self.tool.get("schemaDefs"): for i in self.tool["schemaDefs"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def extend_and_specialize(items, loader): """Apply 'extend' and 'specialize' to fully materialize derived record types.""" types = {t["name"]: t for t in items} n = [] for t in items: t = copy.deepcopy(t) if "extends" in t: if "specialize" in t: spec = { sp["specializeFrom"]: sp["specializeTo"] for sp in aslist(t["specialize"]) } else: spec = {} exfields = [] exsym = [] for ex in aslist(t["extends"]): if ex not in types: raise Exception( "Extends %s in %s refers to invalid base type" % (t["extends"], t["name"])) basetype = copy.deepcopy(types[ex]) if t["type"] == "record": if spec: basetype["fields"] = replace_type( basetype.get("fields", []), spec, loader, set()) for f in basetype.get("fields", []): if "inherited_from" not in f: f["inherited_from"] = ex exfields.extend(basetype.get("fields", [])) elif t["type"] == "enum": exsym.extend(basetype.get("symbols", [])) if t["type"] == "record": exfields.extend(t.get("fields", [])) t["fields"] = exfields fieldnames = set() for field in t["fields"]: if field["name"] in fieldnames: raise validate.ValidationException( "Field name %s appears twice in %s" % (field["name"], t["name"])) else: fieldnames.add(field["name"]) for y in [x for x in t["fields"] if x["name"] == "class"]: y["type"] = { "type": "enum", "symbols": [r["name"]], "name": r["name"] + "_class", } y["doc"] = "Must be `%s` to indicate this is a %s object." % ( r["name"], r["name"]) elif t["type"] == "enum": exsym.extend(t.get("symbols", [])) t["symbol"] = exsym types[t["name"]] = t n.append(t) ex_types = {t["name"]: t for t in n} extended_by = {} for t in n: if "extends" in t: for ex in aslist(t["extends"]): if ex_types[ex].get("abstract"): add_dictlist(extended_by, ex, ex_types[t["name"]]) add_dictlist(extended_by, avro_name(ex), ex_types[ex]) for t in n: if "fields" in t: t["fields"] = replace_type(t["fields"], extended_by, loader, set()) return n
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException( "Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": [] } for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": [] } for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def try_make_job(self, step, basedir, **kwargs): inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] supportsMultipleInput = bool( self.workflow.get_requirement("MultipleInputFeatureRequirement") [0]) try: inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput) if inputobj is None: _logger.debug("[workflow %s] job step %s not ready", self.name, step.id) return _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self)) if step.submitted: return callback = functools.partial(self.receive_output, step, outputparms) valueFrom = { i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i } if len(valueFrom) > 0 and not bool( self.workflow.get_requirement( "StepInputExpressionRequirement")[0]): raise WorkflowException( "Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements" ) vfinputs = {shortname(k): v for k, v in inputobj.iteritems()} def valueFromFunc(k, v): if k in valueFrom: return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements, None, None, {}, context=v) else: return v if "scatter" in step.tool: scatter = aslist(step.tool["scatter"]) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) if "valueFrom" not in kwargs: kwargs["valueFrom"] = valueFromFunc if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs) else: _logger.debug("[workflow %s] Job is input %s", self.name, json.dumps(inputobj, indent=4)) inputobj = { k: valueFromFunc(k, v) for k, v in inputobj.items() } _logger.debug("[workflow %s] Evaluated job input to %s", self.name, json.dumps(inputobj, indent=4)) jobs = step.job(inputobj, basedir, callback, **kwargs) step.submitted = True for j in jobs: yield j except WorkflowException: raise except Exception as e: _logger.exception("Unhandled exception") self.processStatus = "permanentFail" step.completed = True
def extend_and_specialize(items, loader): """Apply 'extend' and 'specialize' to fully materialize derived record types.""" types = {t["name"]: t for t in items} n = [] for t in items: t = copy.deepcopy(t) if "extends" in t: if "specialize" in t: spec = {sp["specializeFrom"]: sp["specializeTo"] for sp in aslist(t["specialize"])} else: spec = {} exfields = [] exsym = [] for ex in aslist(t["extends"]): if ex not in types: raise Exception("Extends %s in %s refers to invalid base type" % (t["extends"], t["name"])) basetype = copy.deepcopy(types[ex]) if t["type"] == "record": if spec: basetype["fields"] = replace_type(basetype.get("fields", []), spec, loader, set()) for f in basetype.get("fields", []): if "inherited_from" not in f: f["inherited_from"] = ex exfields.extend(basetype.get("fields", [])) elif t["type"] == "enum": exsym.extend(basetype.get("symbols", [])) if t["type"] == "record": exfields.extend(t.get("fields", [])) t["fields"] = exfields fieldnames = set() for field in t["fields"]: if field["name"] in fieldnames: raise validate.ValidationException("Field name %s appears twice in %s" % (field["name"], t["name"])) else: fieldnames.add(field["name"]) for y in [x for x in t["fields"] if x["name"] == "class"]: y["type"] = {"type": "enum", "symbols": [r["name"]], "name": r["name"]+"_class", } y["doc"] = "Must be `%s` to indicate this is a %s object." % (r["name"], r["name"]) elif t["type"] == "enum": exsym.extend(t.get("symbols", [])) t["symbol"] = exsym types[t["name"]] = t n.append(t) ex_types = {t["name"]: t for t in n} extended_by = {} for t in n: if "extends" in t: for ex in aslist(t["extends"]): if ex_types[ex].get("abstract"): add_dictlist(extended_by, ex, ex_types[t["name"]]) add_dictlist(extended_by, avro_name(ex), ex_types[ex]) for t in n: if "fields" in t: t["fields"] = replace_type(t["fields"], extended_by, loader, set()) return n
def __init__(self, toolpath_object, pos, **kwargs): if "id" in toolpath_object: self.id = toolpath_object["id"] else: self.id = "#step" + str(pos) try: makeTool = kwargs.get("makeTool") runobj = None if isinstance(toolpath_object["run"], basestring): runobj, _ = schema_salad.schema.load_and_validate( kwargs["loader"], kwargs["avsc_names"], toolpath_object["run"], True) else: runobj = toolpath_object["run"] self.embedded_tool = makeTool(runobj, **kwargs) except validate.ValidationException as v: raise WorkflowException( "Tool definition %s failed validation:\n%s" % (toolpath_object["run"], validate.indent(str(v)))) for field in ("inputs", "outputs"): for i in toolpath_object[field]: inputid = i["id"] p = shortname(inputid) found = False for a in self.embedded_tool.tool[field]: frag = shortname(a["id"]) if frag == p: i.update(a) found = True if not found: i["type"] = "Any" #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id"))) i["id"] = inputid super(WorkflowStep, self).__init__(toolpath_object, **kwargs) if self.embedded_tool.tool["class"] == "Workflow": (feature, _) = self.get_requirement("SubworkflowFeatureRequirement") if not feature: raise WorkflowException( "Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements" ) if "scatter" in self.tool: (feature, _) = self.get_requirement("ScatterFeatureRequirement") if not feature: raise WorkflowException( "Workflow contains scatter but ScatterFeatureRequirement not in requirements" ) inputparms = copy.deepcopy(self.tool["inputs"]) outputparms = copy.deepcopy(self.tool["outputs"]) scatter = aslist(self.tool["scatter"]) method = self.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = { "type": "array", "items": inp_map[s]["type"] } if self.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} self.tool["inputs"] = inputparms self.tool["outputs"] = outputparms
def job(self, joborder, input_basedir, output_callback, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({"position": [-1000000, n], "valueFrom": b}) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({"position": [0, i], "valueFrom": a}) builder.bindings.sort(key=lambda a: a["position"]) reffiles = set((f["path"] for f in builder.files)) j = self.makeJobRunner() j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints _logger.debug( "[job %s] initializing from %s%s", id(j), self.tool.get("id", ""), " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "", ) _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4)) builder.pathmapper = None if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.add(j.stdin) if self.tool.get("stdout"): j.stdout = builder.do_eval(self.tool["stdout"]) if os.path.isabs(j.stdout) or ".." in j.stdout: raise validate.ValidationException("stdout must be a relative path") builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs) builder.requirements = j.requirements for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"])[1] _logger.debug("[job %s] command line bindings is %s", id(j), json.dumps(builder.bindings, indent=4)) _logger.debug( "[job %s] path mappings is %s", id(j), json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4), ) dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix) tmpdir_prefix = kwargs.get("tmpdir_prefix") j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix) else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir createFiles, _ = self.get_requirement("CreateFileRequirement") j.generatefiles = {} if createFiles: for t in createFiles["fileDef"]: j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"])) j.environment = {} evr, _ = self.get_requirement("EnvVarRequirement") if evr: for t in evr["envDef"]: j.environment[t["envName"]] = builder.do_eval(t["envValue"]) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] globpatterns = [] if "glob" in binding: r = [] for gb in aslist(binding["glob"]): try: gb = builder.do_eval(gb) globpatterns.append(gb) if gb: r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))]) except (OSError, IOError) as e: _logger.warn(str(e)) for files in r: checksum = hashlib.sha1() with builder.fs_access.open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "format" in schema: files["format"] = builder.do_eval(schema["format"], context=files) optional = False singlefile = False if isinstance(schema["type"], list): if "null" in schema["type"]: optional = True if "File" in schema["type"]: singlefile = True elif schema["type"] == "File": singlefile = True if "outputEval" in binding: r = builder.do_eval(binding["outputEval"], context=r) if singlefile: # Handle single file outputs not wrapped in a list if r is not None and not isinstance(r, (list, tuple)): r = [r] if optional and r is None: pass elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]): raise WorkflowException("Expression must return a file object for %s." % schema["id"]) if singlefile: if not r and not optional: raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns)) elif not r and optional: pass elif isinstance(r, list): if len(r) > 1: raise WorkflowException("Multiple matches for output item that is a single file.") else: r = r[0] if "secondaryFiles" in schema: for primary in aslist(r): if isinstance(primary, dict): primary["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: sfpath = builder.do_eval(sf, context=r) if isinstance(sfpath, basestring): sfpath = {"path": sfpath, "class": "File"} else: sfpath = {"path": substitute(primary["path"], sf), "class": "File"} for sfitem in aslist(sfpath): if builder.fs_access.exists(sfitem["path"]): primary["secondaryFiles"].append(sfitem) if not r and optional: r = None if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record": r = {} for f in schema["type"]["fields"]: r[shortname(f["name"])] = self.collect_output(f, builder, outdir) return r
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if ( binding and "inputBinding" not in st and "itemSeparator" not in binding and st["type"] in ("array", "map") ): st["inputBinding"] = {} bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend( self.bind_input( {"type": schema["values"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos ) ) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend( self.bind_input( {"type": schema["items"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos ) ) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with self.fs_access.open(datum["path"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in binding: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = self.do_eval(sf, context=datum["path"]) else: sfpath = {"path": substitute(datum["path"], sf), "class": "File"} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) self.files.extend(sfpath) else: datum["secondaryFiles"].append(sfpath) self.files.append(sfpath) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def job(self, joborder, input_basedir, output_callback, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) reffiles = set((f["path"] for f in builder.files)) j = self.makeJobRunner() j.builder = builder j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints j.name = uniquename(kwargs.get("name", str(id(j)))) _logger.debug("[job %s] initializing from %s%s", j.name, self.tool.get("id", ""), " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "") _logger.debug("[job %s] %s", j.name, json.dumps(joborder, indent=4)) builder.pathmapper = None if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.add(j.stdin) if self.tool.get("stdout"): j.stdout = builder.do_eval(self.tool["stdout"]) if os.path.isabs(j.stdout) or ".." in j.stdout: raise validate.ValidationException("stdout must be a relative path") builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs) builder.requirements = j.requirements # map files to assigned path inside a container. We need to also explicitly # walk over input as implicit reassignment doesn't reach everything in builder.bindings def _check_adjust(f): if not f.get("containerfs"): f["path"] = builder.pathmapper.mapper(f["path"])[1] f["containerfs"] = True return f _logger.debug("[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4)) adjustFileObjs(builder.files, _check_adjust) adjustFileObjs(builder.bindings, _check_adjust) _logger.debug("[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4)) dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix) tmpdir_prefix = kwargs.get('tmpdir_prefix') j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix) else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir createFiles, _ = self.get_requirement("CreateFileRequirement") j.generatefiles = {} if createFiles: for t in createFiles["fileDef"]: j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"])) j.environment = {} evr, _ = self.get_requirement("EnvVarRequirement") if evr: for t in evr["envDef"]: j.environment[t["envName"]] = builder.do_eval(t["envValue"]) shellcmd, _ = self.get_requirement("ShellCommandRequirement") if shellcmd: cmd = [] for b in builder.bindings: arg = builder.generate_arg(b) if b.get("shellQuote", True): arg = [shellescape.quote(a) for a in aslist(arg)] cmd.extend(aslist(arg)) j.command_line = ["/bin/sh", "-c", " ".join(cmd)] else: j.command_line = flatten(map(builder.generate_arg, builder.bindings)) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j
def __init__(self, toolpath_object, pos, **kwargs): if "id" in toolpath_object: self.id = toolpath_object["id"] else: self.id = "#step" + str(pos) try: makeTool = kwargs.get("makeTool") runobj = None if isinstance(toolpath_object["run"], basestring): runobj, _ = schema_salad.schema.load_and_validate(kwargs["loader"], kwargs["avsc_names"], toolpath_object["run"], True) else: runobj = toolpath_object["run"] self.embedded_tool = makeTool(runobj, **kwargs) except validate.ValidationException as v: raise WorkflowException("Tool definition %s failed validation:\n%s" % (toolpath_object["run"], validate.indent(str(v)))) for field in ("inputs", "outputs"): for i in toolpath_object[field]: inputid = i["id"] p = shortname(inputid) found = False for a in self.embedded_tool.tool[field]: frag = shortname(a["id"]) if frag == p: i.update(a) found = True if not found: i["type"] = "Any" #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id"))) i["id"] = inputid super(WorkflowStep, self).__init__(toolpath_object, **kwargs) if self.embedded_tool.tool["class"] == "Workflow": (feature, _) = self.get_requirement("SubworkflowFeatureRequirement") if not feature: raise WorkflowException("Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements") if "scatter" in self.tool: (feature, _) = self.get_requirement("ScatterFeatureRequirement") if not feature: raise WorkflowException("Workflow contains scatter but ScatterFeatureRequirement not in requirements") inputparms = copy.deepcopy(self.tool["inputs"]) outputparms = copy.deepcopy(self.tool["outputs"]) scatter = aslist(self.tool["scatter"]) method = self.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") inp_map = {i["id"]: i for i in inputparms} for s in scatter: if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} if self.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(scatter) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} self.tool["inputs"] = inputparms self.tool["outputs"] = outputparms
def try_make_job(self, step, basedir, **kwargs): inputobj = {} if "scatter" in step.tool: if not self.check_feature("ScatterFeature", kwargs): raise WorkflowException( "Must include ScatterFeature in requirements.") inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in aslist(step.tool["scatter"]): if s not in inp_map: raise WorkflowException("Invalid Scatter parameter '%s'" % s) inp_map[s]["type"] = { "type": "array", "items": inp_map[s]["type"] } if step.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(aslist(step.tool["scatter"])) else: nesting = 1 for r in xrange(0, nesting): for i in outputparms: i["type"] = {"type": "array", "items": i["type"]} else: inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] for inp in inputparms: _logger.debug(inp) iid = idk(inp["id"]) if "connect" in inp: connections = inp["connect"] is_array = isinstance(inp["type"], dict) and inp["type"]["type"] == "array" for connection in aslist(connections): src = idk(connection["source"]) if src in self.state and self.state[src] is not None: if self.state[src].parameter["type"] == inp["type"]: # source and input types are the same if is_array and iid in inputobj: # there's already a value in the input object, so extend the existing array inputobj[iid].extend(self.state[src].value) else: # simply assign the value from state to input inputobj[iid] = copy.deepcopy( self.state[src].value) elif is_array and self.state[src].parameter[ "type"] == inp["type"]["items"]: # source type is the item type on the input array # promote single item to array entry if iid in inputobj: inputobj[iid].append(self.state[src].value) else: inputobj[iid] = [self.state[src].value] else: raise WorkflowException( "Type mismatch between '%s' (%s) and '%s' (%s)" % (src, self.state[src].parameter["type"], idk(inp["id"]), inp["type"])) elif src not in self.state: raise WorkflowException( "Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: return elif "default" in inp: inputobj[iid] = inp["default"] else: raise WorkflowException("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", inputobj) callback = functools.partial(self.receive_output, step, outputparms) if step.tool.get("scatter"): method = step.tool.get("scatterMethod") if method is None and len(aslist(step.tool["scatter"])) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter( step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) for j in jobs: yield j
def job(self, joborder, input_basedir, output_callback, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) reffiles = set((f["path"] for f in builder.files)) j = self.makeJobRunner() j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints _logger.debug( "[job %s] initializing from %s%s", id(j), self.tool.get("id", ""), " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "") _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4)) builder.pathmapper = None if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.add(j.stdin) if self.tool.get("stdout"): j.stdout = builder.do_eval(self.tool["stdout"]) if os.path.isabs(j.stdout) or ".." in j.stdout: raise validate.ValidationException( "stdout must be a relative path") builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs) builder.requirements = j.requirements for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"])[1] _logger.debug("[job %s] command line bindings is %s", id(j), json.dumps(builder.bindings, indent=4)) _logger.debug( "[job %s] path mappings is %s", id(j), json.dumps( { p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files() }, indent=4)) dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp( prefix=out_prefix) tmpdir_prefix = kwargs.get('tmpdir_prefix') j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp( prefix=tmpdir_prefix) else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir createFiles, _ = self.get_requirement("CreateFileRequirement") j.generatefiles = {} if createFiles: for t in createFiles["fileDef"]: j.generatefiles[builder.do_eval( t["filename"])] = copy.deepcopy( builder.do_eval(t["fileContent"])) j.environment = {} evr, _ = self.get_requirement("EnvVarRequirement") if evr: for t in evr["envDef"]: j.environment[t["envName"]] = builder.do_eval(t["envValue"]) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j