def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: r = [] bg = expression.do_eval(binding["glob"], builder.job, self.requirements, self.docpath) for gb in aslist(bg): r.extend([{"path": g, "class": "File"} for g in glob.glob(os.path.join(outdir, gb))]) for files in r: checksum = hashlib.sha1() with open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if "outputEval" in binding: r = expression.do_eval(binding["outputEval"], builder.job, self.requirements, self.docpath, r) if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r): raise WorkflowException("Expression must return a file object.") if schema["type"] == "File": if len(r) != 1: raise WorkflowException("Multiple matches for output item that is a single file.") r = r[0] if schema["type"] == "File" and "secondaryFiles" in binding: r["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, r["path"]) else: sfpath = {"path": substitute(r["path"], sf)} if isinstance(sfpath, list): r["secondaryFiles"].extend(sfpath) else: r["secondaryFiles"].append(sfpath) if not r and schema["type"] == "record": r = {} for f in schema["fields"]: r[f["name"]] = self.collect_output(f, builder, outdir) return r
def do_eval(self, ex, context=None, pull_image=True): return expression.do_eval(ex, self.job, self.requirements, self.outdir, self.tmpdir, context=context, pull_image=pull_image)
def valueFromFunc(k, v): if k in valueFrom: return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements, None, None, {}, context=v) else: return v
def collect_output(self, schema, builder, outdir): r = None if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: r = [{ "path": g } for g in glob.glob(os.path.join(outdir, binding["glob"]))] for files in r: checksum = hashlib.sha1() with open(files["path"], "rb") as f: contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents filesize = 0 while contents != "": checksum.update(contents) filesize += len(contents) contents = f.read(1024 * 1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() files["size"] = filesize if schema["type"] == "array" and schema["items"] == "File": pass elif schema["type"] == "File": r = r[0] if r else None elif binding.get("loadContents"): r = [v["contents"] for v in r] if len(r) == 1: r = r[0] else: r = None if "valueFrom" in binding: r = expression.do_eval(binding["valueFrom"], builder.job, self.requirements, self.docpath, r) if not r and schema["type"] == "record": r = {} for f in schema["fields"]: r[f["name"]] = self.collect_output(f, builder, outdir) return r
def generate_arg(self, binding): value = binding["valueFrom"] if "do_eval" in binding: value = expression.do_eval(binding["do_eval"], self.job, self.requirements, self.docpath, value) prefix = binding.get("prefix") sep = binding.get("separator") l = [] if isinstance(value, list): if binding.get("itemSeparator"): l = [binding["itemSeparator"].join([str(v) for v in value])] elif binding.get("do_eval"): return ([prefix] if prefix else []) + value elif prefix: return [prefix] else: return [] elif binding.get("is_file"): l = [value["path"]] elif isinstance(value, dict): return [prefix] if prefix else [] elif value is True and prefix: return [prefix] elif value is False or value is None: return [] else: l = [value] args = [] for j in l: if sep is None or sep == " ": args.extend([prefix, str(j)]) else: args.extend([prefix + sep + str(j)]) return [a for a in args if a is not None]
def generate_arg(self, binding): value = binding["valueFrom"] if "do_eval" in binding: value = expression.do_eval(binding["do_eval"], self.job, self.requirements, self.docpath, value) prefix = binding.get("prefix") sep = binding.get("separate", True) l = [] if isinstance(value, list): if binding.get("itemSeparator"): l = [binding["itemSeparator"].join([str(v) for v in value])] elif binding.get("do_eval"): return ([prefix] if prefix else []) + value elif prefix: return [prefix] else: return [] elif binding.get("is_file"): l = [value["path"]] elif isinstance(value, dict): return [prefix] if prefix else [] elif value is True and prefix: return [prefix] elif value is False or value is None: return [] else: l = [value] args = [] for j in l: if sep: args.extend([prefix, str(j)]) else: args.append(prefix + str(j)) return [a for a in args if a is not None]
def do_eval(self, ex, context=None, pull_image=True): return expression.do_eval( ex, self.job, self.requirements, self.outdir, self.tmpdir, context=context, pull_image=pull_image )
def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): builder = self._init_job(joborder, basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) _logger.debug(pprint.pformat(builder.bindings)) _logger.debug(pprint.pformat(builder.files)) reffiles = [f["path"] for f in builder.files] j = CommandLineJob() j.joborder = builder.job j.stdin = None j.stdout = None builder.pathmapper = None if self.tool.get("stdin"): j.stdin = self.tool["stdin"] if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.append(j.stdin) if self.tool.get("stdout"): if isinstance(self.tool["stdout"], dict) and "ref" in self.tool["stdout"]: for out in self.tool.get("outputs", []): if out["id"] == self.tool["stdout"]["ref"]: filename = self.tool["stdout"]["ref"][1:] j.stdout = filename out["outputBinding"] = out.get("outputBinding", {}) out["outputBinding"]["glob"] = filename if not j.stdout: raise Exception("stdout refers to invalid output") else: j.stdout = self.tool["stdout"] if os.path.isabs(j.stdout): raise Exception("stdout must be a relative path") j.requirements = self.requirements j.hints = self.hints for r in (j.requirements + j.hints): if r["class"] == "DockerRequirement" and use_container: builder.pathmapper = DockerPathMapper(reffiles, basedir) if builder.pathmapper is None: builder.pathmapper = PathMapper(reffiles, basedir) for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"]) builder.requirements = j.requirements j.generatefiles = {} for t in self.tool.get("fileDefs", []): j.generatefiles[t["filename"]] = expression.do_eval( t["value"], builder.job, j.requirements, self.docpath) j.environment = {} for t in self.tool.get("environmentDefs", []): j.environment[t["env"]] = expression.do_eval( t["value"], builder.job, j.requirements, self.docpath) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) if j.stdin: j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join( basedir, j.stdin) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j
def run(self, outdir=None, **kwargs): self.output_callback( expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath))
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum if schema["type"] == "File": binding["is_file"] = True # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and "itemSeparator" not in binding and st["type"] in ("array", "map"): st["inputBinding"] = {} bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend(self.bind_input({"type": schema["values"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend(self.bind_input({"type": schema["items"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with open(os.path.join(self.input_basedir, datum["path"]), "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in binding: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict): sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, datum["path"]) else: sfpath = {"path": substitute(datum["path"], sf)} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) self.files.append(sfpath) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def job(self, joborder, input_basedir, output_callback, use_container=True, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) reffiles = [f["path"] for f in builder.files] j = CommandLineJob() j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints builder.pathmapper = None if self.tool.get("stdin"): j.stdin = expression.do_eval(self.tool["stdin"], builder.job, self.requirements, self.docpath, j.stdin) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.append(j.stdin) if self.tool.get("stdout"): j.stdout = expression.do_eval(self.tool["stdout"], builder.job, j.requirements, self.docpath) if os.path.isabs(j.stdout): raise validate.ValidationException("stdout must be a relative path") dockerReq, _ = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints) if dockerReq and use_container: builder.pathmapper = DockerPathMapper(reffiles, input_basedir) if builder.pathmapper is None: builder.pathmapper = PathMapper(reffiles, input_basedir) for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"])[1] _logger.debug("Bindings is %s", pprint.pformat(builder.bindings)) _logger.debug("Files is %s", pprint.pformat({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()})) builder.requirements = j.requirements j.generatefiles = {} createFiles, _ = get_feature("CreateFileRequirement", requirements=self.requirements, hints=self.hints) if createFiles: for t in createFiles["fileDef"]: j.generatefiles[t["filename"]] = expression.do_eval(t["fileContent"], builder.job, j.requirements, self.docpath) j.environment = {} evr, _ = get_feature("EnvVarRequirement", requirements=self.requirements, hints=self.hints) if evr: for t in evr["envDef"]: j.environment[t["envName"]] = expression.do_eval(t["envValue"], builder.job, j.requirements, self.docpath) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) if j.stdin: j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(input_basedir, j.stdin) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j
def run(self, outdir=None, **kwargs): try: self.output_callback(expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath), "success") except Exception: self.output_callback({}, "permanentFail")
def bind_input(self, schema, datum): bindings = [] # Handle union types if isinstance(schema["type"], list): success = False for t in schema["type"]: if t in self.schemaDefs: t = self.schemaDefs[t] avsc = avro.schema.make_avsc_object(t, None) if validate.validate(avsc, datum): if isinstance(t, basestring): t = {"type": t} bindings.extend(self.bind_input(t, datum)) success = True break if not success: raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): bindings.extend(self.bind_input(schema["type"], datum)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: b = self.bind_input(f, datum[f["name"]]) for bi in b: bi["position"].append(f["name"]) bindings.extend(b) if schema["type"] == "map": for v in datum: b = self.bind_input(schema["values"], datum[v]) for bi in b: bi["position"].insert(0, v) bindings.extend(b) if schema["type"] == "array": for n, item in enumerate(datum): b = self.bind_input({"type": schema["items"], "commandLineBinding": schema.get("commandLineBinding")}, item) for bi in b: bi["position"].insert(0, n) bindings.extend(b) if schema["type"] == "File": if schema.get("loadContents"): with open(os.path.join(self.basedir, datum["path"]), "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) self.files.append(datum) if "secondaryFiles" in schema: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(schema["secondaryFiles"]): if isinstance(sf, dict): sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, datum["path"]) else: sfpath = {"path": substitute(datum["path"], sf)} if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) self.files.append(sfpath) b = None if "commandLineBinding" in schema and isinstance(schema["commandLineBinding"], dict): b = copy.copy(schema["commandLineBinding"]) if b.get("position"): b["position"] = [b["position"]] else: b["position"] = [0] # Position to front of the sort key for bi in bindings: bi["position"] = b["position"] + bi["position"] if "valueFrom" in b: b["do_eval"] = b["valueFrom"] b["valueFrom"] = datum if schema["type"] == "File": b["is_file"] = True bindings.append(b) return bindings
def run(self, outdir=None, **kwargs): self.output_callback(expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath))