Example #1
0
def object_from_state(state, parms, frag_only, supportsMultipleInput):
    inputobj = {}
    for inp in parms:
        iid = inp["id"]
        if frag_only:
            iid = shortname(iid)
        if "source" in inp:
            if isinstance(inp["source"], list) and not supportsMultipleInput:
                raise WorkflowException("Workflow contains multiple inbound links to a single parameter but MultipleInputFeatureRequirement is not declared.")
            connections = aslist(inp["source"])
            for src in connections:
                if src in state and state[src] is not None:
                    if not match_types(inp["type"], state[src], iid, inputobj,
                                            inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None)),
                                       valueFrom=inp.get("valueFrom")):
                        raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, state[src].parameter["type"], inp["id"], inp["type"]))
                elif src not in state:
                    raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                else:
                    return None
        elif "default" in inp:
            inputobj[iid] = inp["default"]
        elif "valueFrom" in inp:
            inputobj[iid] = None
        else:
            raise WorkflowException("Value for %s not specified" % (inp["id"]))
    return inputobj
Example #2
0
 def __init__(self, step):
     self.step = step
     self.tool = step.tool
     self.id = step.id
     self.submitted = False
     self.completed = False
     self.name = uniquename("step %s" % shortname(self.id))
Example #3
0
def generate_parser(toolparser, tool, namemap):
    toolparser.add_argument("job_order", nargs="?", help="Job input json file")
    namemap["job_order"] = "job_order"

    for inp in tool.tool["inputs"]:
        name = shortname(inp["id"])
        if len(name) == 1:
            flag = "-"
        else:
            flag = "--"

        namemap[name.replace("-", "_")] = name

        inptype = inp["type"]

        required = True
        if isinstance(inptype, list):
            if inptype[0] == "null":
                required = False
                if len(inptype) == 2:
                    inptype = inptype[1]
                else:
                    _logger.debug("Can't make command line argument from %s",
                                  inptype)
                    return None

        help = inp.get("description", "").replace("%", "%%")
        kwargs = {}

        if inptype == "File":
            kwargs["action"] = FileAction
        elif isinstance(inptype, dict) and inptype["type"] == "array":
            if inptype["items"] == "File":
                kwargs["action"] = FileAppendAction
            else:
                kwargs["action"] = "append"

        if inptype == "string":
            kwargs["type"] = str
        elif inptype == "int":
            kwargs["type"] = int
        elif inptype == "float":
            kwargs["type"] = float
        elif inptype == "boolean":
            kwargs["action"] = "store_true"

        if "default" in inp:
            kwargs["default"] = inp["default"]
            required = False

        if "type" not in kwargs and "action" not in kwargs:
            _logger.debug("Can't make command line argument from %s", inptype)
            return None

        toolparser.add_argument(flag + name,
                                required=required,
                                help=help,
                                **kwargs)

    return toolparser
Example #4
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            ret = {}
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                with builder.fs_access.open(custom_output, "r") as f:
                    ret = yaml.load(f)
                _logger.debug("Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
                adjustFileObjs(ret, remove_hostfs)
                adjustFileObjs(ret, functools.partial(revmap_file, builder, outdir))
                adjustFileObjs(ret, remove_hostfs)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
                return ret

            for port in ports:
                fragment = shortname(port["id"])
                try:
                    ret[fragment] = self.collect_output(port, builder, outdir)
                except Exception as e:
                    raise WorkflowException("Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e))
            if ret:
                adjustFileObjs(ret, remove_hostfs)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Example #5
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            ret = {}
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                with builder.fs_access.open(custom_output, "r") as f:
                    ret = yaml.load(f)
                _logger.debug("Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
                adjustFileObjs(ret, remove_hostfs)
                adjustFileObjs(ret, functools.partial(revmap_file, builder, outdir))
                adjustFileObjs(ret, remove_hostfs)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
                return ret

            for port in ports:
                fragment = shortname(port["id"])
                try:
                    ret[fragment] = self.collect_output(port, builder, outdir)
                except Exception as e:
                    raise WorkflowException("Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e))
            if ret:
                adjustFileObjs(ret, remove_hostfs)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Example #6
0
 def __init__(self, step):
     self.step = step
     self.tool = step.tool
     self.id = step.id
     self.submitted = False
     self.completed = False
     self.name = uniquename("step %s" % shortname(self.id))
def generate_parser(toolparser, tool, namemap):
    toolparser.add_argument("job_order", nargs="?", help="Job input json file")
    namemap["job_order"] = "job_order"

    for inp in tool.tool["inputs"]:
        name = shortname(inp["id"])
        if len(name) == 1:
            flag = "-"
        else:
            flag = "--"

        namemap[name.replace("-", "_")] = name

        inptype = inp["type"]

        required = True
        if isinstance(inptype, list):
            if inptype[0] == "null":
                required = False
                if len(inptype) == 2:
                    inptype = inptype[1]
                else:
                    _logger.debug("Can't make command line argument from %s", inptype)
                    return None

        help = inp.get("description", "").replace("%", "%%")
        kwargs = {}

        if inptype == "File":
            kwargs["action"] = FileAction
        elif isinstance(inptype, dict) and inptype["type"] == "array":
            if inptype["items"] == "File":
                kwargs["action"] = FileAppendAction
            else:
                kwargs["action"] = "append"

        if inptype == "string":
            kwargs["type"] = str
        elif inptype == "int":
            kwargs["type"] = int
        elif inptype == "float":
            kwargs["type"] = float
        elif inptype == "boolean":
            kwargs["action"] = "store_true"

        if "default" in inp:
            kwargs["default"] = inp["default"]
            required = False

        if "type" not in kwargs and "action" not in kwargs:
            _logger.debug("Can't make command line argument from %s", inptype)
            return None

        toolparser.add_argument(flag + name, required=required, help=help, **kwargs)

    return toolparser
Example #8
0
 def receive_output(self, output_callback, jobout, processStatus):
     #_logger.debug("WorkflowStep output from run is %s", jobout)
     output = {}
     for i in self.tool["outputs"]:
         field = shortname(i["id"])
         if field in jobout:
             output[i["id"]] = jobout[field]
         else:
             processStatus = "permanentFail"
     output_callback(output, processStatus)
Example #9
0
 def receive_output(self, output_callback, jobout, processStatus):
     #_logger.debug("WorkflowStep output from run is %s", jobout)
     output = {}
     for i in self.tool["outputs"]:
         field = shortname(i["id"])
         if field in jobout:
             output[i["id"]] = jobout[field]
         else:
             processStatus = "permanentFail"
     output_callback(output, processStatus)
    def job(self, joborder, basedir, output_callback, **kwargs):
        for i in self.tool["inputs"]:
            p = i["id"]
            field = shortname(p)
            joborder[field] = joborder[i["id"]]
            del joborder[i["id"]]

        kwargs["requirements"] = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        kwargs["hints"] = kwargs.get("hints", []) + self.tool.get("hints", [])

        for t in self.embedded_tool.job(joborder, basedir, functools.partial(self.receive_output, output_callback), **kwargs):
            yield t
Example #11
0
    def __init__(self, workflow, **kwargs):
        self.workflow = workflow
        self.tool = workflow.tool
        self.steps = [WorkflowJobStep(s) for s in workflow.steps]
        self.id = workflow.tool["id"]
        if "outdir" in kwargs:
            self.outdir = kwargs["outdir"]
        elif "tmp_outdir_prefix" in kwargs:
            self.outdir = tempfile.mkdtemp(prefix=kwargs["tmp_outdir_prefix"])
        else:
            # tmp_outdir_prefix defaults to tmp, so this is unlikely to be used
            self.outdir = tempfile.mkdtemp()

        self.name = uniquename(kwargs.get("name", shortname(self.workflow.tool["id"])))

        _logger.debug("[workflow %s] initialized from %s", self.name, self.tool["id"])
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc)
                return outputdoc

            ret = {}
            for port in ports:
                fragment = shortname(port["id"])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Example #13
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        for i in self.tool["inputs"]:
            p = i["id"]
            field = shortname(p)
            joborder[field] = joborder[i["id"]]
            del joborder[i["id"]]

        kwargs["requirements"] = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        kwargs["hints"] = kwargs.get("hints", []) + self.tool.get("hints", [])

        try:
            for t in self.embedded_tool.job(joborder, basedir,
                                            functools.partial(self.receive_output, output_callback),
                                            **kwargs):
                yield t
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unexpected exception")
            raise WorkflowException(str(e))
Example #14
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        for i in self.tool["inputs"]:
            p = i["id"]
            field = shortname(p)
            joborder[field] = joborder[i["id"]]
            del joborder[i["id"]]

        kwargs["requirements"] = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        kwargs["hints"] = kwargs.get("hints", []) + self.tool.get("hints", [])

        try:
            for t in self.embedded_tool.job(joborder, basedir,
                                            functools.partial(self.receive_output, output_callback),
                                            **kwargs):
                yield t
        except WorkflowException:
            _logger.error("Exception on step '%s'", kwargs.get("name"))
            raise
        except Exception as e:
            _logger.exception("Unexpected exception")
            raise WorkflowException(str(e))
Example #15
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(
                    self.names.get_name("outputs_record_schema", ""),
                    outputdoc)
                return outputdoc

            ret = {}

            for port in ports:
                fragment = shortname(port["id"])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " +
                                    str(e) + "\n in " +
                                    json.dumps(ret, indent=4))
Example #16
0
    def __init__(self, toolpath_object, pos, **kwargs):
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        try:
            makeTool = kwargs.get("makeTool")
            runobj = None
            if isinstance(toolpath_object["run"], basestring):
                runobj, _ = schema_salad.schema.load_and_validate(kwargs["loader"],
                                                                  kwargs["avsc_names"],
                                                                  toolpath_object["run"],
                                                                  True)
            else:
                runobj = toolpath_object["run"]
            self.embedded_tool = makeTool(runobj, **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException("Tool definition %s failed validation:\n%s" % (toolpath_object["run"], validate.indent(str(v))))

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                p = shortname(inputid)
                found = False
                for a in self.embedded_tool.tool[field]:
                    frag = shortname(a["id"])
                    if frag == p:
                        i.update(a)
                        found = True
                if not found:
                    i["type"] = "Any"
                    #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id")))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object, **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature, _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException("Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements")

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException("Workflow contains scatter but ScatterFeatureRequirement not in requirements")

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
Example #17
0
    def __init__(self, toolpath_object, pos, **kwargs):
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        try:
            makeTool = kwargs.get("makeTool")
            runobj = None
            if isinstance(toolpath_object["run"], basestring):
                runobj, _ = schema_salad.schema.load_and_validate(
                    kwargs["loader"], kwargs["avsc_names"],
                    toolpath_object["run"], True)
            else:
                runobj = toolpath_object["run"]
            self.embedded_tool = makeTool(runobj, **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"], validate.indent(str(v))))

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                p = shortname(inputid)
                found = False
                for a in self.embedded_tool.tool[field]:
                    frag = shortname(a["id"])
                    if frag == p:
                        i.update(a)
                        found = True
                if not found:
                    i["type"] = "Any"
                    #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id")))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object, **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement not in requirements"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
Example #18
0
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            globpatterns = []

            revmap = functools.partial(revmap_file, builder, outdir)

            if "glob" in binding:
                r = []
                for gb in aslist(binding["glob"]):
                    gb = builder.do_eval(gb)
                    if gb:
                        globpatterns.extend(aslist(gb))

                for gb in globpatterns:
                    if gb.startswith("/"):
                        raise WorkflowError("glob patterns must not start with '/'")
                    try:
                        r.extend([{"path": g, "class": "File", "hostfs": True}
                                  for g in builder.fs_access.glob(os.path.join(outdir, gb))])
                    except (OSError, IOError) as e:
                        _logger.warn(str(e))

                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024*1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize
                    if "format" in schema:
                        files["format"] = builder.do_eval(schema["format"], context=files)

            optional = False
            singlefile = False
            if isinstance(schema["type"], list):
                if "null" in schema["type"]:
                    optional = True
                if "File" in schema["type"]:
                    singlefile = True
            elif schema["type"] == "File":
                singlefile = True

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if singlefile:
                    # Handle single file outputs not wrapped in a list
                    if r is not None and not isinstance(r, (list, tuple)):
                        r = [r]
                    if optional and r is None:
                        pass
                    elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]):
                        raise WorkflowException("Expression must return a file object for %s." % schema["id"])

            if singlefile:
                if not r and not optional:
                    raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns))
                elif not r and optional:
                    pass
                elif isinstance(r, list):
                    if len(r) > 1:
                        raise WorkflowException("Multiple matches for output item that is a single file.")
                    else:
                        r = r[0]

            # Ensure files point to local references outside of the run environment
            adjustFileObjs(r, revmap)

            if "secondaryFiles" in schema:
                for primary in aslist(r):
                    if isinstance(primary, dict):
                        primary["secondaryFiles"] = []
                        for sf in aslist(schema["secondaryFiles"]):
                            if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                                sfpath = builder.do_eval(sf, context=r)
                                if isinstance(sfpath, basestring):
                                    sfpath = revmap({"path": sfpath, "class": "File"})
                            else:
                                sfpath = {"path": substitute(primary["path"], sf), "class": "File", "hostfs": True}

                            for sfitem in aslist(sfpath):
                                if builder.fs_access.exists(sfitem["path"]):
                                    primary["secondaryFiles"].append(sfitem)

            if not r and optional:
                r = None

        if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record":
            r = {}
            for f in schema["type"]["fields"]:
                r[shortname(f["name"])] = self.collect_output(f, builder, outdir)

        return r
Example #19
0
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(
            self.workflow.get_requirement("MultipleInputFeatureRequirement")
            [0])

        try:
            inputobj = object_from_state(self.state, inputparms, False,
                                         supportsMultipleInput)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", self.name,
                              step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s",
                          id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            valueFrom = {
                i["id"]: i["valueFrom"]
                for i in step.tool["inputs"] if "valueFrom" in i
            }

            if len(valueFrom) > 0 and not bool(
                    self.workflow.get_requirement(
                        "StepInputExpressionRequirement")[0]):
                raise WorkflowException(
                    "Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements"
                )

            vfinputs = {shortname(k): v for k, v in inputobj.iteritems()}

            def valueFromFunc(k, v):
                if k in valueFrom:
                    return expression.do_eval(valueFrom[k],
                                              vfinputs,
                                              self.workflow.requirements,
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )
                if "valueFrom" not in kwargs:
                    kwargs["valueFrom"] = valueFromFunc
                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                _logger.debug("[workflow %s] Job is input %s", self.name,
                              json.dumps(inputobj, indent=4))
                inputobj = {
                    k: valueFromFunc(k, v)
                    for k, v in inputobj.items()
                }
                _logger.debug("[workflow %s] Evaluated job input to %s",
                              self.name, json.dumps(inputobj, indent=4))
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
Example #20
0
    def job(self,
            joborder,
            basedir,
            output_callback,
            move_outputs=True,
            **kwargs):
        self.state = {}
        self.processStatus = "success"

        if "outdir" in kwargs:
            del kwargs["outdir"]

        for i in self.tool["inputs"]:
            iid = shortname(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException(
                    "Input '%s' not in input object and does not have a default value."
                    % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None

        output_dirs = set()

        completed = 0
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False
            completed = 0
            for step in self.steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, **kwargs):
                        if newjob:
                            made_progress = True
                            if newjob.outdir:
                                output_dirs.add(newjob.outdir)
                        yield newjob
            if not made_progress and completed < len(self.steps):
                yield None

        supportsMultipleInput = bool(
            self.workflow.get_requirement("MultipleInputFeatureRequirement")
            [0])

        wo = object_from_state(self.state, self.tool["outputs"], True,
                               supportsMultipleInput)

        if wo is None:
            raise WorkflowException("Output for workflow not available")

        if move_outputs:
            targets = set()
            conflicts = set()

            outfiles = findfiles(wo)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a) + 1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a) + 1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (
                                sp[0], str(random.randint(1,
                                                          1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.debug("[workflow %s] Moving '%s' to '%s'",
                                      self.name, src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in output_dirs:
                if os.path.exists(a) and empty_subtree(a):
                    if kwargs.get("rm_tmpdir", True):
                        _logger.debug(
                            "[workflow %s] Removing intermediate output directory %s",
                            self.name, a)
                        shutil.rmtree(a, True)

        _logger.info("[workflow %s] outdir is %s", self.name, self.outdir)

        output_callback(wo, self.processStatus)
Example #21
0
def load_tool(argsworkflow, updateonly, strict, makeTool, debug,
              print_pre=False,
              print_rdf=False,
              print_dot=False,
              print_deps=False,
              relative_deps=False,
              rdf_serializer=None,
              stdout=sys.stdout,
              urifrag=None):
    (document_loader, avsc_names, schema_metadata) = process.get_schema()

    if isinstance(avsc_names, Exception):
        raise avsc_names

    jobobj = None
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        else:
            uri = "file://" + os.path.abspath(argsworkflow)
        fileuri, urifrag = urlparse.urldefrag(uri)
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        workflowobj = argsworkflow
        uri = urifrag
        fileuri = "#"
    else:
        raise schema_salad.validate.ValidationException("Must be URI or dict")

    if "cwl:tool" in workflowobj:
        jobobj = workflowobj
        uri = urlparse.urljoin(uri, jobobj["cwl:tool"])
        fileuri, urifrag = urlparse.urldefrag(uri)
        workflowobj = document_loader.fetch(fileuri)
        del jobobj["cwl:tool"]

    if isinstance(workflowobj, list):
        # bare list without a version must be treated as draft-2
        workflowobj = {"cwlVersion": "https://w3id.org/cwl/cwl#draft-2",
                       "id": fileuri,
                       "@graph": workflowobj}

    workflowobj = update.update(workflowobj, document_loader, fileuri)
    document_loader.idx.clear()

    if updateonly:
        stdout.write(json.dumps(workflowobj, indent=4))
        return 0

    if print_deps:
        printdeps(workflowobj, document_loader, stdout, relative_deps)
        return 0

    try:
        processobj, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
    except (schema_salad.validate.ValidationException, RuntimeError) as e:
        _logger.error("Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
        return 1

    if print_pre:
        stdout.write(json.dumps(processobj, indent=4))
        return 0

    if print_rdf:
        printrdf(argsworkflow, processobj, document_loader.ctx, rdf_serializer, stdout)
        return 0

    if print_dot:
        printdot(argsworkflow, processobj, document_loader.ctx, stdout)
        return 0

    if urifrag:
        processobj, _ = document_loader.resolve_ref(uri)
    elif isinstance(processobj, list):
        if 1 == len(processobj):
            processobj = processobj[0]
        else:
            _logger.error("Tool file contains graph of multiple objects, must specify one of #%s",
                          ", #".join(urlparse.urldefrag(i["id"])[1]
                                     for i in processobj if "id" in i))
            return 1

    try:
        t = makeTool(processobj, strict=strict, makeTool=makeTool, loader=document_loader, avsc_names=avsc_names)
    except (schema_salad.validate.ValidationException) as e:
        _logger.error("Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
        return 1
    except (RuntimeError, workflow.WorkflowException) as e:
        _logger.error("Tool definition failed initialization:\n%s", e, exc_info=(e if debug else False))
        return 1

    if jobobj:
        for inp in t.tool["inputs"]:
            if shortname(inp["id"]) in jobobj:
                inp["default"] = jobobj[shortname(inp["id"])]

    if metadata:
        t.metadata = metadata
    else:
        t.metadata = {"$namespaces": t.tool.get("$namespaces", {}), "$schemas": t.tool.get("$schemas", [])}

    return t
Example #22
0
 def job(self, joborder, basedir, output_callback, **kwargs):
     kwargs["part_of"] = self.name
     kwargs["name"] = shortname(self.id)
     for j in self.step.job(joborder, basedir, output_callback, **kwargs):
         yield j
Example #23
0
def load_tool(argsworkflow,
              updateonly,
              strict,
              makeTool,
              debug,
              print_pre=False,
              print_rdf=False,
              print_dot=False,
              print_deps=False,
              relative_deps=False,
              rdf_serializer=None,
              stdout=sys.stdout,
              urifrag=None):
    (document_loader, avsc_names, schema_metadata) = process.get_schema()

    if isinstance(avsc_names, Exception):
        raise avsc_names

    jobobj = None
    if isinstance(argsworkflow, basestring):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        else:
            uri = "file://" + os.path.abspath(argsworkflow)
        fileuri, urifrag = urlparse.urldefrag(uri)
        workflowobj = document_loader.fetch(fileuri)
        if isinstance(workflowobj, list):
            # bare list without a version must be treated as draft-2
            workflowobj = {
                "cwlVersion": "https://w3id.org/cwl/cwl#draft-2",
                "id": fileuri,
                "@graph": workflowobj
            }
    elif isinstance(argsworkflow, dict):
        workflowobj = argsworkflow
        uri = urifrag
        fileuri = ""
    else:
        raise schema_salad.validate.ValidationException("Must be URI or dict")

    if "cwl:tool" in workflowobj:
        jobobj = workflowobj
        workflowobj = document_loader.fetch(
            urlparse.urljoin(uri, workflowobj["cwl:tool"]))

    workflowobj = update.update(workflowobj, document_loader, fileuri)
    document_loader.idx.clear()

    if updateonly:
        stdout.write(json.dumps(workflowobj, indent=4))
        return 0

    if print_deps:
        printdeps(workflowobj, document_loader, stdout, relative_deps)
        return 0

    try:
        processobj, metadata = schema_salad.schema.load_and_validate(
            document_loader, avsc_names, workflowobj, strict)
    except (schema_salad.validate.ValidationException, RuntimeError) as e:
        _logger.error("Tool definition failed validation:\n%s",
                      e,
                      exc_info=(e if debug else False))
        return 1

    if print_pre:
        stdout.write(json.dumps(processobj, indent=4))
        return 0

    if print_rdf:
        printrdf(argsworkflow, processobj, document_loader.ctx, rdf_serializer,
                 stdout)
        return 0

    if print_dot:
        printdot(argsworkflow, processobj, document_loader.ctx, stdout)
        return 0

    if urifrag:
        processobj, _ = document_loader.resolve_ref(uri)
    elif isinstance(processobj, list):
        if 1 == len(processobj):
            processobj = processobj[0]
        else:
            _logger.error(
                "Tool file contains graph of multiple objects, must specify one of #%s",
                ", #".join(
                    urlparse.urldefrag(i["id"])[1] for i in processobj
                    if "id" in i))
            return 1

    try:
        t = makeTool(processobj,
                     strict=strict,
                     makeTool=makeTool,
                     loader=document_loader,
                     avsc_names=avsc_names)
    except (schema_salad.validate.ValidationException) as e:
        _logger.error("Tool definition failed validation:\n%s",
                      e,
                      exc_info=(e if debug else False))
        return 1
    except (RuntimeError, workflow.WorkflowException) as e:
        _logger.error("Tool definition failed initialization:\n%s",
                      e,
                      exc_info=(e if debug else False))
        return 1

    if jobobj:
        for inp in t.tool["inputs"]:
            if shortname(inp["id"]) in jobobj:
                inp["default"] = jobobj[shortname(inp["id"])]

    if metadata:
        t.metadata = metadata
    else:
        t.metadata = {
            "$namespaces": t.tool.get("$namespaces", {}),
            "$schemas": t.tool.get("$schemas", [])
        }

    return t
Example #24
0
def load_job_order(args,
                   t,
                   parser,
                   stdin,
                   print_input_deps=False,
                   relative_deps=False,
                   stdout=sys.stdout):

    job_order_object = None

    if args.conformance_test:
        loader = Loader({})
    else:
        jobloaderctx = {
            "path": {
                "@type": "@id"
            },
            "format": {
                "@type": "@id"
            },
            "id": "@id"
        }
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = Loader(jobloaderctx)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, "")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(
            os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file)
        except Exception as e:
            _logger.error(e, exc_info=(e if args.debug else False))
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}
        toolparser = generate_parser(
            argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(
                        os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(
                        cmd_line["job_order"])
                except Exception as e:
                    _logger.error(e, exc_info=(e if args.debug else False))
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            job_order_object.update(
                {namemap[k]: v
                 for k, v in cmd_line.items()})

            _logger.debug("Parsed job order from command line: %s",
                          json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"])
                                 not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        parser.print_help()
        if toolparser:
            print "\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required")
        return 1

    if print_input_deps:
        printdeps(job_order_object,
                  loader,
                  stdout,
                  relative_deps,
                  basedir="file://%s/" % input_basedir)
        return 0

    return (job_order_object, input_basedir)
Example #25
0
def load_job_order(args, t, parser, stdin, print_input_deps=False, relative_deps=False, stdout=sys.stdout):

    job_order_object = None

    if args.conformance_test:
        loader = Loader({})
    else:
        jobloaderctx = {"path": {"@type": "@id"}, "format": {"@type": "@id"}, "id": "@id"}
        jobloaderctx.update(t.metadata.get("$namespaces", {}))
        loader = Loader(jobloaderctx)

    if len(args.job_order) == 1 and args.job_order[0][0] != "-":
        job_order_file = args.job_order[0]
    elif len(args.job_order) == 1 and args.job_order[0] == "-":
        job_order_object = yaml.load(stdin)
        job_order_object, _ = loader.resolve_all(job_order_object, "")
    else:
        job_order_file = None

    if job_order_object:
        input_basedir = args.basedir if args.basedir else os.getcwd()
    elif job_order_file:
        input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(job_order_file))
        try:
            job_order_object, _ = loader.resolve_ref(job_order_file)
        except Exception as e:
            _logger.error(e, exc_info=(e if args.debug else False))
            return 1
        toolparser = None
    else:
        input_basedir = args.basedir if args.basedir else os.getcwd()
        namemap = {}
        toolparser = generate_parser(argparse.ArgumentParser(prog=args.workflow), t, namemap)
        if toolparser:
            if args.tool_help:
                toolparser.print_help()
                return 0
            cmd_line = vars(toolparser.parse_args(args.job_order))

            if cmd_line["job_order"]:
                try:
                    input_basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(cmd_line["job_order"]))
                    job_order_object = loader.resolve_ref(cmd_line["job_order"])
                except Exception as e:
                    _logger.error(e, exc_info=(e if args.debug else False))
                    return 1
            else:
                job_order_object = {"id": args.workflow}

            job_order_object.update({namemap[k]: v for k,v in cmd_line.items()})

            _logger.debug("Parsed job order from command line: %s", json.dumps(job_order_object, indent=4))
        else:
            job_order_object = None

    for inp in t.tool["inputs"]:
        if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object):
            if not job_order_object:
                job_order_object = {}
            job_order_object[shortname(inp["id"])] = inp["default"]

    if not job_order_object and len(t.tool["inputs"]) > 0:
        parser.print_help()
        if toolparser:
            print "\nOptions for %s " % args.workflow
            toolparser.print_help()
        _logger.error("")
        _logger.error("Input object required")
        return 1

    if print_input_deps:
        printdeps(job_order_object, loader, stdout, relative_deps,
                  basedir="file://%s/" % input_basedir)
        return 0

    if "cwl:tool" in job_order_object:
        del job_order_object["cwl:tool"]
    if "id" in job_order_object:
        del job_order_object["id"]

    return (job_order_object, input_basedir)
Example #26
0
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            globpatterns = []
            if "glob" in binding:
                r = []
                for gb in aslist(binding["glob"]):
                    try:
                        gb = builder.do_eval(gb)
                        globpatterns.append(gb)
                        if gb:
                            r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))])
                    except (OSError, IOError) as e:
                        _logger.warn(str(e))
                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024*1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize
                    if "format" in schema:
                        files["format"] = builder.do_eval(schema["format"], context=files)

            optional = False
            singlefile = False
            if isinstance(schema["type"], list):
                if "null" in schema["type"]:
                    optional = True
                if "File" in schema["type"]:
                    singlefile = True
            elif schema["type"] == "File":
                singlefile = True

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if singlefile:
                    # Handle single file outputs not wrapped in a list
                    if r is not None and not isinstance(r, (list, tuple)):
                        r = [r]
                    if optional and r is None:
                        pass
                    elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]):
                        raise WorkflowException("Expression must return a file object for %s." % schema["id"])

            if singlefile:
                if not r and not optional:
                    raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns))
                elif not r and optional:
                    pass
                elif isinstance(r, list):
                    if len(r) > 1:
                        raise WorkflowException("Multiple matches for output item that is a single file.")
                    else:
                        r = r[0]

            if "secondaryFiles" in schema:
                for primary in aslist(r):
                    if isinstance(primary, dict):
                        primary["secondaryFiles"] = []
                        for sf in aslist(schema["secondaryFiles"]):
                            if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                                sfpath = builder.do_eval(sf, context=r)
                                if isinstance(sfpath, basestring):
                                    sfpath = {"path": sfpath, "class": "File"}
                            else:
                                sfpath = {"path": substitute(primary["path"], sf), "class": "File"}

                            for sfitem in aslist(sfpath):
                                if builder.fs_access.exists(sfitem["path"]):
                                    primary["secondaryFiles"].append(sfitem)

            if not r and optional:
                r = None

        if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record":
            r = {}
            for f in schema["type"]["fields"]:
                r[shortname(f["name"])] = self.collect_output(f, builder, outdir)

        return r
Example #27
0
 def job(self, joborder, basedir, output_callback, **kwargs):
     kwargs["part_of"] = self.name
     kwargs["name"] = shortname(self.id)
     for j in self.step.job(joborder, basedir, output_callback, **kwargs):
         yield j
Example #28
0
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])

        try:
            inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", self.name, step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step, outputparms)

            valueFrom = {i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i}

            if len(valueFrom) > 0 and not bool(self.workflow.get_requirement("StepInputExpressionRequirement")[0]):
                raise WorkflowException("Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements")

            vfinputs = {shortname(k): v for k,v in inputobj.iteritems()}
            def valueFromFunc(k, v):
                if k in valueFrom:
                    return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements,
                                       None, None, {}, context=v)
                else:
                    return v

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")
                if "valueFrom" not in kwargs:
                    kwargs["valueFrom"] = valueFromFunc
                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj,
                                                       basedir, scatter, callback, **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0, **kwargs)
            else:
                _logger.debug("[workflow %s] Job is input %s", self.name, json.dumps(inputobj, indent=4))
                inputobj = {k: valueFromFunc(k, v) for k,v in inputobj.items()}
                _logger.debug("[workflow %s] Evaluated job input to %s", self.name, json.dumps(inputobj, indent=4))
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
Example #29
0
    def job(self, joborder, basedir, output_callback, move_outputs=True, **kwargs):
        self.state = {}
        self.processStatus = "success"

        if "outdir" in kwargs:
            del kwargs["outdir"]

        for i in self.tool["inputs"]:
            iid = shortname(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None

        output_dirs = set()

        completed = 0
        iterables = []
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False

            for step in self.steps:
                if not step.submitted:
                    step.iterable = self.try_make_job(step, basedir, **kwargs)

                if step.iterable:
                    for newjob in step.iterable:
                        if newjob:
                            made_progress = True
                            if newjob.outdir:
                                output_dirs.add(newjob.outdir)
                            yield newjob
                        else:
                            break

            completed = sum(1 for s in self.steps if s.completed)

            if not made_progress and completed < len(self.steps):
                yield None

        supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])

        wo = object_from_state(self.state, self.tool["outputs"], True, supportsMultipleInput)

        if wo is None:
            raise WorkflowException("Output for workflow not available")

        if move_outputs:
            targets = set()
            conflicts = set()

            outfiles = findfiles(wo)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a)+1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a)+1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.debug("[workflow %s] Moving '%s' to '%s'", self.name, src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in output_dirs:
                if os.path.exists(a) and empty_subtree(a):
                    if kwargs.get("rm_tmpdir", True):
                        _logger.debug("[workflow %s] Removing intermediate output directory %s", self.name, a)
                        shutil.rmtree(a, True)

        _logger.info("[workflow %s] outdir is %s", self.name, self.outdir)

        output_callback(wo, self.processStatus)