def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            if "glob" in binding:
                r = []
                bg = expression.do_eval(binding["glob"], builder.job, self.requirements, self.docpath)
                for gb in aslist(bg):
                    r.extend([{"path": g, "class": "File"} for g in glob.glob(os.path.join(outdir, gb))])
                for files in r:
                    checksum = hashlib.sha1()
                    with open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024*1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize

            if "outputEval" in binding:
                r = expression.do_eval(binding["outputEval"], builder.job, self.requirements, self.docpath, r)
                if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r):
                    raise WorkflowException("Expression must return a file object.")

            if schema["type"] == "File":
                if len(r) != 1:
                    raise WorkflowException("Multiple matches for output item that is a single file.")
                r = r[0]

            if schema["type"] == "File" and "secondaryFiles" in binding:
                r["secondaryFiles"] = []
                for sf in aslist(binding["secondaryFiles"]):
                    if isinstance(sf, dict):
                        sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, r["path"])
                    else:
                        sfpath = {"path": substitute(r["path"], sf)}
                    if isinstance(sfpath, list):
                        r["secondaryFiles"].extend(sfpath)
                    else:
                        r["secondaryFiles"].append(sfpath)


        if not r and schema["type"] == "record":
            r = {}
            for f in schema["fields"]:
                r[f["name"]] = self.collect_output(f, builder, outdir)

        return r
 def do_eval(self, ex, context=None, pull_image=True):
     return expression.do_eval(ex,
                               self.job,
                               self.requirements,
                               self.outdir,
                               self.tmpdir,
                               context=context,
                               pull_image=pull_image)
Exemple #3
0
 def valueFromFunc(k, v):
     if k in valueFrom:
         return expression.do_eval(valueFrom[k],
                                   vfinputs,
                                   self.workflow.requirements,
                                   None,
                                   None, {},
                                   context=v)
     else:
         return v
Exemple #4
0
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            if "glob" in binding:
                r = [{
                    "path": g
                } for g in glob.glob(os.path.join(outdir, binding["glob"]))]
                for files in r:
                    checksum = hashlib.sha1()
                    with open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024 * 1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize

                if schema["type"] == "array" and schema["items"] == "File":
                    pass
                elif schema["type"] == "File":
                    r = r[0] if r else None
                elif binding.get("loadContents"):
                    r = [v["contents"] for v in r]
                    if len(r) == 1:
                        r = r[0]
                else:
                    r = None

            if "valueFrom" in binding:
                r = expression.do_eval(binding["valueFrom"], builder.job,
                                       self.requirements, self.docpath, r)

        if not r and schema["type"] == "record":
            r = {}
            for f in schema["fields"]:
                r[f["name"]] = self.collect_output(f, builder, outdir)

        return r
Exemple #5
0
    def generate_arg(self, binding):
        value = binding["valueFrom"]
        if "do_eval" in binding:
            value = expression.do_eval(binding["do_eval"], self.job,
                                       self.requirements, self.docpath, value)

        prefix = binding.get("prefix")
        sep = binding.get("separator")

        l = []
        if isinstance(value, list):
            if binding.get("itemSeparator"):
                l = [binding["itemSeparator"].join([str(v) for v in value])]
            elif binding.get("do_eval"):
                return ([prefix] if prefix else []) + value
            elif prefix:
                return [prefix]
            else:
                return []
        elif binding.get("is_file"):
            l = [value["path"]]
        elif isinstance(value, dict):
            return [prefix] if prefix else []
        elif value is True and prefix:
            return [prefix]
        elif value is False or value is None:
            return []
        else:
            l = [value]

        args = []
        for j in l:
            if sep is None or sep == " ":
                args.extend([prefix, str(j)])
            else:
                args.extend([prefix + sep + str(j)])

        return [a for a in args if a is not None]
    def generate_arg(self, binding):
        value = binding["valueFrom"]
        if "do_eval" in binding:
            value = expression.do_eval(binding["do_eval"], self.job, self.requirements, self.docpath, value)

        prefix = binding.get("prefix")
        sep = binding.get("separate", True)

        l = []
        if isinstance(value, list):
            if binding.get("itemSeparator"):
                l = [binding["itemSeparator"].join([str(v) for v in value])]
            elif binding.get("do_eval"):
                return ([prefix] if prefix else []) + value
            elif prefix:
                return [prefix]
            else:
                return []
        elif binding.get("is_file"):
            l = [value["path"]]
        elif isinstance(value, dict):
            return [prefix] if prefix else []
        elif value is True and prefix:
            return [prefix]
        elif value is False or value is None:
            return []
        else:
            l = [value]

        args = []
        for j in l:
            if sep:
                args.extend([prefix, str(j)])
            else:
                args.append(prefix + str(j))

        return [a for a in args if a is not None]
 def do_eval(self, ex, context=None, pull_image=True):
     return expression.do_eval(
         ex, self.job, self.requirements, self.outdir, self.tmpdir, context=context, pull_image=pull_image
     )
Exemple #8
0
 def valueFromFunc(k, v):
     if k in valueFrom:
         return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements,
                            None, None, {}, context=v)
     else:
         return v
Exemple #9
0
    def job(self,
            joborder,
            basedir,
            output_callback,
            use_container=True,
            **kwargs):
        builder = self._init_job(joborder, basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        _logger.debug(pprint.pformat(builder.bindings))
        _logger.debug(pprint.pformat(builder.files))

        reffiles = [f["path"] for f in builder.files]

        j = CommandLineJob()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = self.tool["stdin"]
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.append(j.stdin)

        if self.tool.get("stdout"):
            if isinstance(self.tool["stdout"],
                          dict) and "ref" in self.tool["stdout"]:
                for out in self.tool.get("outputs", []):
                    if out["id"] == self.tool["stdout"]["ref"]:
                        filename = self.tool["stdout"]["ref"][1:]
                        j.stdout = filename
                        out["outputBinding"] = out.get("outputBinding", {})
                        out["outputBinding"]["glob"] = filename
                if not j.stdout:
                    raise Exception("stdout refers to invalid output")
            else:
                j.stdout = self.tool["stdout"]
            if os.path.isabs(j.stdout):
                raise Exception("stdout must be a relative path")

        j.requirements = self.requirements
        j.hints = self.hints

        for r in (j.requirements + j.hints):
            if r["class"] == "DockerRequirement" and use_container:
                builder.pathmapper = DockerPathMapper(reffiles, basedir)

        if builder.pathmapper is None:
            builder.pathmapper = PathMapper(reffiles, basedir)

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])

        builder.requirements = j.requirements

        j.generatefiles = {}
        for t in self.tool.get("fileDefs", []):
            j.generatefiles[t["filename"]] = expression.do_eval(
                t["value"], builder.job, j.requirements, self.docpath)

        j.environment = {}
        for t in self.tool.get("environmentDefs", []):
            j.environment[t["env"]] = expression.do_eval(
                t["value"], builder.job, j.requirements, self.docpath)

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        if j.stdin:
            j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(
                basedir, j.stdin)

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports,
                                              self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
Exemple #10
0
 def run(self, outdir=None, **kwargs):
     self.output_callback(
         expression.do_eval(self.script, self.builder.job,
                            self.requirements, self.builder.docpath))
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

            if schema["type"] == "File":
                binding["is_file"] = True

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
            raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and "itemSeparator" not in binding and st["type"] in ("array", "map"):
                st["inputBinding"] = {}
            bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(self.bind_input({"type": schema["values"], "inputBinding": b2},
                                                    item, lead_pos=n, tail_pos=tail_pos))
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(self.bind_input({"type": schema["items"], "inputBinding": b2},
                                                    item, lead_pos=n, tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with open(os.path.join(self.input_basedir, datum["path"]), "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                    if "secondaryFiles" in binding:
                        if "secondaryFiles" not in datum:
                            datum["secondaryFiles"] = []
                        for sf in aslist(schema["secondaryFiles"]):
                            if isinstance(sf, dict):
                                sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, datum["path"])
                            else:
                                sfpath = {"path": substitute(datum["path"], sf)}
                            if isinstance(sfpath, list):
                                datum["secondaryFiles"].extend(sfpath)
                            else:
                                datum["secondaryFiles"].append(sfpath)
                            self.files.append(sfpath)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
    def job(self, joborder, input_basedir, output_callback, use_container=True, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = [f["path"] for f in builder.files]

        j = CommandLineJob()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints

        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = expression.do_eval(self.tool["stdin"], builder.job, self.requirements, self.docpath, j.stdin)
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.append(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = expression.do_eval(self.tool["stdout"], builder.job, j.requirements, self.docpath)
            if os.path.isabs(j.stdout):
                raise validate.ValidationException("stdout must be a relative path")

        dockerReq, _ = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints)
        if dockerReq and use_container:
                builder.pathmapper = DockerPathMapper(reffiles, input_basedir)

        if builder.pathmapper is None:
            builder.pathmapper = PathMapper(reffiles, input_basedir)

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])[1]

        _logger.debug("Bindings is %s", pprint.pformat(builder.bindings))
        _logger.debug("Files is %s", pprint.pformat({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}))

        builder.requirements = j.requirements

        j.generatefiles = {}
        createFiles, _ = get_feature("CreateFileRequirement", requirements=self.requirements, hints=self.hints)
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[t["filename"]] = expression.do_eval(t["fileContent"], builder.job, j.requirements, self.docpath)

        j.environment = {}
        evr, _ = get_feature("EnvVarRequirement", requirements=self.requirements, hints=self.hints)
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = expression.do_eval(t["envValue"], builder.job, j.requirements, self.docpath)

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        if j.stdin:
            j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(input_basedir, j.stdin)

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
 def run(self, outdir=None, **kwargs):
     try:
         self.output_callback(expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath), "success")
     except Exception:
         self.output_callback({}, "permanentFail")
    def bind_input(self, schema, datum):
        bindings = []

        # Handle union types
        if isinstance(schema["type"], list):
            success = False
            for t in schema["type"]:
                if t in self.schemaDefs:
                    t = self.schemaDefs[t]
                avsc = avro.schema.make_avsc_object(t, None)
                if validate.validate(avsc, datum):
                    if isinstance(t, basestring):
                        t = {"type": t}
                    bindings.extend(self.bind_input(t, datum))
                    success = True
                    break
            if not success:
                raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            bindings.extend(self.bind_input(schema["type"], datum))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        b = self.bind_input(f, datum[f["name"]])
                        for bi in b:
                            bi["position"].append(f["name"])
                        bindings.extend(b)

            if schema["type"] == "map":
                for v in datum:
                    b = self.bind_input(schema["values"], datum[v])
                    for bi in b:
                        bi["position"].insert(0, v)
                    bindings.extend(b)

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b = self.bind_input({"type": schema["items"], "commandLineBinding": schema.get("commandLineBinding")}, item)
                    for bi in b:
                        bi["position"].insert(0, n)
                    bindings.extend(b)

            if schema["type"] == "File":
                if schema.get("loadContents"):
                    with open(os.path.join(self.basedir, datum["path"]), "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)
                self.files.append(datum)
                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict):
                            sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, datum["path"])
                        else:
                            sfpath = {"path": substitute(datum["path"], sf)}
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                        self.files.append(sfpath)

        b = None
        if "commandLineBinding" in schema and isinstance(schema["commandLineBinding"], dict):
            b = copy.copy(schema["commandLineBinding"])

            if b.get("position"):
                b["position"] = [b["position"]]
            else:
                b["position"] = [0]

            # Position to front of the sort key
            for bi in bindings:
                bi["position"] = b["position"] + bi["position"]

            if "valueFrom" in b:
                b["do_eval"] = b["valueFrom"]
            b["valueFrom"] = datum

            if schema["type"] == "File":
                b["is_file"] = True
            bindings.append(b)

        return bindings
 def run(self, outdir=None, **kwargs):
     self.output_callback(expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath))