def object_from_state(state, parms, frag_only):
    inputobj = {}
    for inp in parms:
        iid = inp["id"]
        if frag_only:
            (_, iid) = urlparse.urldefrag(iid)
            iid = iid.split(".")[-1]
        if "source" in inp:
            connections = aslist(inp["source"])
            for src in connections:
                if src in state and state[src] is not None:
                    if not match_types(
                            inp["type"], state[src], iid, inputobj,
                            inp.get("linkMerge",
                                    ("merge_nested"
                                     if len(connections) > 1 else None))):
                        raise WorkflowException(
                            "Type mismatch between source '%s' (%s) and sink '%s' (%s)"
                            % (src, state[src].parameter["type"], inp["id"],
                               inp["type"]))
                elif src not in state:
                    raise WorkflowException(
                        "Connect source '%s' on parameter '%s' does not exist"
                        % (src, inp["id"]))
                else:
                    return None
        elif "default" in inp:
            inputobj[iid] = inp["default"]
        else:
            raise WorkflowException("Value for %s not specified" % (inp["id"]))
    return inputobj
def exeval(ex, jobinput, requirements, docpath, context, pull_image):
    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[
                "engine"]:
            if r["id"][0] != "#":
                with open(os.path.join(docpath, r["id"])) as f:
                    ex_obj = yaml.load(f)
                sch = process.get_schema()
                validate.validate_ex(
                    sch.get_name("ExpressionEngineRequirement", ""), ex_obj)
                r = ex_obj

            runtime = []
            img_id = docker.get_from_requirements(r.get("requirements"),
                                                  r.get("hints"), pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("expressionDefs", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(os.path.join(r["_docpath"], exdef["ref"])) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "expressionDefs": exdefs,
                "job": jobinput,
                "context": context
            }

            _logger.debug(json.dumps(inp))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                                  shell=False,
                                  close_fds=True,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException(
                    "Expression engine returned non-zero exit code.")

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
Beispiel #3
0
    def _init_job(self, joborder, basedir, **kwargs):
        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), joborder)
        except validate.ValidationException as v:
            _logger.error("Failed to validate %s\n%s" %
                          (pprint.pformat(joborder), v))
            raise

        for r in self.tool.get("requirements", []):
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" %
                                        (r["class"]))

        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        builder = Builder()
        builder.job = copy.deepcopy(joborder)
        builder.jslib = ''
        builder.basedir = basedir
        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.docpath = self.docpath

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        return builder
Beispiel #4
0
    def adjust_for_scatter(self, steps):
        (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement")
        for step in steps:
            if scatterSpec and "scatter" in step.tool:
                inputparms = copy.deepcopy(step.tool["inputs"])
                outputparms = copy.deepcopy(step.tool["outputs"])
                scatter = aslist(step.tool["scatter"])

                inp_map = {i["id"]: i for i in inputparms}
                for s in scatter:
                    if s not in inp_map:
                        raise WorkflowException(
                            "Invalid Scatter parameter '%s'" % s)

                    inp_map[s]["type"] = {
                        "type": "array",
                        "items": inp_map[s]["type"]
                    }

                if step.tool.get("scatterMethod") == "nested_crossproduct":
                    nesting = len(scatter)
                else:
                    nesting = 1

                for r in xrange(0, nesting):
                    for i in outputparms:
                        i["type"] = {"type": "array", "items": i["type"]}
                step.tool["inputs"] = inputparms
                step.tool["outputs"] = outputparms
def dotproduct_scatter(process, joborder, basedir, scatter_keys,
                       output_callback, **kwargs):
    l = None
    for s in scatter_keys:
        if l is None:
            l = len(joborder[s])
        elif l != len(joborder[s]):
            raise WorkflowException(
                "Length of input arrays must be equal when performing dotproduct scatter."
            )

    output = {}
    for i in process.tool["outputs"]:
        output[i["id"]] = [None] * l

    rc = ReceiveScatterOutput(output_callback, output)

    for n in range(0, l):
        jo = copy.copy(joborder)
        for s in scatter_keys:
            jo[s] = joborder[s][n]

        for j in process.job(jo, basedir,
                             functools.partial(rc.receive_scatter_output, n),
                             **kwargs):
            yield j

    rc.setTotal(l)
Beispiel #6
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""),
                             joborder)

        requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        steps = [
            makeTool(step, basedir) for step in self.tool.get("steps", [])
        ]
        random.shuffle(steps)

        self.state = {}
        for i in self.tool["inputs"]:
            iid = idk(i["id"])
            if iid in joborder:
                self.state[iid] = WorkflowStateItem(
                    i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[iid] = WorkflowStateItem(
                    i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException(
                    "Input '%s' not in input object and does not have a default value."
                    % (i["id"]))

        for s in steps:
            for out in s.tool["outputs"]:
                self.state[idk(out["id"])] = None
            s.completed = False

        completed = 0
        while completed < len(steps):
            made_progress = False
            completed = 0
            for step in steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step,
                                                    basedir,
                                                    requirements=requirements,
                                                    hints=hints,
                                                    **kwargs):
                        if newjob:
                            made_progress = True
                            yield newjob
            if not made_progress and completed < len(steps):
                yield None

        wo = {}
        for i in self.tool["outputs"]:
            if "connect" in i:
                src = idk(i["connect"]["source"])
                wo[idk(i["id"])] = self.state[src].value

        output_callback(wo)
 def tostr(self, value):
     if isinstance(value, dict) and value.get("class") == "File":
         if "path" not in value:
             raise WorkflowException("File object must have \"path\": %s" %
                                     (value))
         return value["path"]
     else:
         return str(value)
Beispiel #8
0
    def __init__(self, toolpath_object, docpath):
        self.impl = toolpath_object["impl"]
        try:
            self.embedded_tool = makeTool(
                from_url(os.path.join(docpath, self.impl)), docpath)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (os.path.join(docpath, self.impl), validate.indent(str(v))))

        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step_" + str(random.randint(1, 1000000000))

        for i in toolpath_object["inputs"]:
            d = i["def"][len(self.impl):]
            toolid = i.get("id", self.id + "." + idk(d))
            found = False
            for a in self.embedded_tool.tool["inputs"]:
                if a["id"] == d:
                    i.update(a)
                    found = True
            if not found:
                raise WorkflowException(
                    "Did not find input '%s' in external process" % (i["def"]))

            i["id"] = toolid

        for i in toolpath_object["outputs"]:
            d = i["def"][len(self.impl):]
            toolid = i["id"]
            found = False
            for a in self.embedded_tool.tool["outputs"]:
                if a["id"] == d:
                    i.update(a)
                    found = True
            if not found:
                raise WorkflowException(
                    "Did not find output '%s' in external process" %
                    (i["def"]))

            i["id"] = toolid

        super(External, self).__init__(toolpath_object, "Process", docpath)
Beispiel #9
0
 def receive_output(self, jobout):
     self.output = {}
     for i in self.tool["outputs"]:
         if i["def"][:len(self.impl)] != self.impl:
             raise WorkflowException(
                 "'def' is '%s' but must refer to fragment of resource '%s' listed in 'impl'"
                 % (i["def"], self.impl))
         d = idk(i["def"][len(self.impl):])
         self.output[idk(i["id"])] = jobout[d]
Beispiel #10
0
    def __init__(self, toolpath_object, **kwargs):
        try:
            makeTool = kwargs.get("makeTool")
            self.embedded_tool = makeTool(toolpath_object["run"], **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"]["id"], validate.indent(str(v))))

        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step_" + str(random.randint(1, 1000000000))

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                (_, d) = urlparse.urldefrag(inputid)
                frag = d.split(".")[-1]
                p = urlparse.urljoin(toolpath_object["run"].get("id", self.id),
                                     "#" + frag)
                found = False
                for a in self.embedded_tool.tool[field]:
                    if a["id"] == p:
                        i.update(a)
                        found = True
                if not found:
                    raise WorkflowException(
                        "Did not find %s parameter '%s' in workflow step" %
                        (field, p))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object,
                                           "Process",
                                           do_validate=False,
                                           **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not declared"
                )
    def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            (_, d) = urlparse.urldefrag(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        for r in self.requirements:
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" %
                                        (r["class"]))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/tmp/job_output"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp/job_tmp"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(
            input_basedir)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        return builder
Beispiel #12
0
 def receive_output(self, step, outputparms, jobout):
     _logger.info("Job got output: %s", jobout)
     for i in outputparms:
         if "id" in i:
             if idk(i["id"]) in jobout:
                 self.state[idk(i["id"])] = WorkflowStateItem(
                     i, jobout[idk(i["id"])])
             else:
                 raise WorkflowException(
                     "Output is missing expected field %s" % idk(i["id"]))
     step.completed = True
def defaultMakeTool(toolpath_object, **kwargs):
    if "class" in toolpath_object:
        if toolpath_object["class"] == "CommandLineTool":
            return draft2tool.CommandLineTool(toolpath_object, **kwargs)
        elif toolpath_object["class"] == "ExpressionTool":
            return draft2tool.ExpressionTool(toolpath_object, **kwargs)
        elif toolpath_object["class"] == "Workflow":
            return Workflow(toolpath_object, **kwargs)

    raise WorkflowException(
        "Missing or invalid 'class' field in %s, expecting one of: CommandLineTool, ExpressionTool"
        % toolpath_object["id"])
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        try:
            inputobj = object_from_state(self.state, inputparms, False)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", id(self),
                              step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s",
                          id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
Beispiel #15
0
    def try_make_job(self, step, basedir, **kwargs):
        _logger.debug("Try to make job %s", step.id)

        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        try:
            inputobj = self.object_from_state(inputparms, False)
            if inputobj is None:
                return

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            (scatterSpec,
             _) = self.get_requirement("ScatterFeatureRequirement")
            if scatterSpec and "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except Exception as e:
            _logger.error(e)
            self.processStatus = "permanentFail"
            step.completed = True
Beispiel #16
0
def makeTool(toolpath_object, docpath):
    """docpath is the directory the tool file is located."""
    if "schema" in toolpath_object:
        return draft1tool.Tool(toolpath_object)
    elif "impl" in toolpath_object and toolpath_object.get(
            "class", "External") == "External":
        return External(toolpath_object, docpath)
    if "class" in toolpath_object:
        if toolpath_object["class"] == "CommandLineTool":
            return draft2tool.CommandLineTool(toolpath_object, docpath)
        elif toolpath_object["class"] == "ExpressionTool":
            return draft2tool.ExpressionTool(toolpath_object, docpath)
        elif toolpath_object["class"] == "Workflow":
            return Workflow(toolpath_object, docpath)
    else:
        raise WorkflowException(
            "Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External"
        )
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(
                    self.names.get_name("outputs_record_schema", ""),
                    outputdoc)
                return outputdoc

            ret = {}
            for port in ports:
                doc_url, fragment = urlparse.urldefrag(port['id'])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " +
                                    str(e) + "\n in " +
                                    json.dumps(ret, indent=4))
Beispiel #18
0
 def match_types(self, sinktype, src, iid, inputobj, linkMerge):
     if isinstance(sinktype, list):
         # Sink is union type
         for st in sinktype:
             if self.match_types(st, src, iid, inputobj, linkMerge):
                 return True
     elif isinstance(src.parameter["type"], list):
         # Source is union type
         # Check that every source type is compatible with the sink.
         for st in src.parameter["type"]:
             srccopy = copy.deepcopy(src)
             srccopy.parameter["type"] = st
             if not self.match_types(st, srccopy, iid, inputobj, linkMerge):
                 return False
         return True
     else:
         is_array = isinstance(sinktype,
                               dict) and sinktype["type"] == "array"
         if is_array and linkMerge:
             if iid not in inputobj:
                 inputobj[iid] = []
             if linkMerge == "merge_nested":
                 inputobj[iid].append(src.value)
             elif linkMerge == "merge_flattened":
                 if isinstance(src.value, list):
                     inputobj[iid].extend(src.value)
                 else:
                     inputobj[iid].append(src.value)
             else:
                 raise WorkflowException(
                     "Unrecognized linkMerge enum '%s'" % linkMerge)
             return True
         elif src.parameter["type"] == sinktype:
             # simply assign the value from state to input
             inputobj[iid] = copy.deepcopy(src.value)
             return True
     return False
    def __init__(self, toolpath_object, pos, **kwargs):
        try:
            makeTool = kwargs.get("makeTool")
            self.embedded_tool = makeTool(toolpath_object["run"], **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"]["id"], validate.indent(str(v))))

        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                (_, d) = urlparse.urldefrag(inputid)
                frag = d.split(".")[-1]
                p = urlparse.urljoin(toolpath_object["run"].get("id", self.id),
                                     "#" + frag)
                found = False
                for a in self.embedded_tool.tool[field]:
                    if a["id"] == p:
                        i.update(a)
                        found = True
                if not found:
                    raise WorkflowException(
                        "Did not find %s parameter '%s' in workflow step" %
                        (field, p))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object,
                                           "Process",
                                           do_validate=False,
                                           **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not declared"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement not declared"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
    def job(self,
            joborder,
            basedir,
            output_callback,
            move_outputs=True,
            **kwargs):
        self.state = {}
        self.processStatus = "success"

        if "outdir" in kwargs:
            del kwargs["outdir"]

        for i in self.tool["inputs"]:
            (_, iid) = urlparse.urldefrag(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException(
                    "Input '%s' not in input object and does not have a default value."
                    % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None

        output_dirs = set()

        completed = 0
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False
            completed = 0
            for step in self.steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, **kwargs):
                        if newjob:
                            made_progress = True
                            if newjob.outdir:
                                output_dirs.add(newjob.outdir)
                        yield newjob
            if not made_progress and completed < len(self.steps):
                yield None

        wo = object_from_state(self.state, self.tool["outputs"], True)

        if move_outputs:
            targets = set()
            conflicts = set()

            outfiles = findfiles(wo)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a) + 1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in outfiles:
                for a in output_dirs:
                    if f["path"].startswith(a):
                        src = f["path"]
                        dst = os.path.join(self.outdir, src[len(a) + 1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (
                                sp[0], str(random.randint(1,
                                                          1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.debug("[workflow %s] Moving '%s' to '%s'",
                                      id(self), src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in output_dirs:
                if os.path.exists(a) and empty_subtree(a):
                    _logger.debug(
                        "[workflow %s] Removing intermediate output directory %s",
                        id(self), a)
                    shutil.rmtree(a, True)

        _logger.info("[workflow %s] outdir is %s", id(self), self.outdir)

        output_callback(wo, self.processStatus)
Beispiel #21
0
    def run(self,
            dry_run=False,
            pull_image=True,
            rm_container=True,
            rm_tmpdir=True,
            move_outputs=True,
            **kwargs):
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
        #    json.dump(self.joborder, fp)

        runtime = []
        env = {"TMPDIR": self.tmpdir}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")

        for f in self.pathmapper.files():
            if not os.path.exists(self.pathmapper.mapper(f)[0]):
                raise WorkflowException("Required input file %s not found" %
                                        self.pathmapper.mapper(f)[0])

        img_id = None
        if docker_req and kwargs.get("use_container") is not False:
            env = os.environ
            img_id = docker.get_from_requirements(docker_req, docker_is_req,
                                                  pull_image)

        if docker_is_req and img_id is None:
            raise WorkflowException(
                "Docker is required for running this tool.")

        if img_id:
            runtime = ["docker", "run", "-i"]
            for src in self.pathmapper.files():
                vol = self.pathmapper.mapper(src)
                runtime.append("--volume=%s:%s:ro" % vol)
            runtime.append("--volume=%s:%s:rw" %
                           (os.path.abspath(self.outdir), "/tmp/job_output"))
            runtime.append("--volume=%s:%s:rw" %
                           (os.path.abspath(self.tmpdir), "/tmp/job_tmp"))
            runtime.append("--workdir=%s" % ("/tmp/job_output"))
            euid = docker_vm_uid() or os.geteuid()
            runtime.append("--user=%s" % (euid))

            if rm_container:
                runtime.append("--rm")

            runtime.append("--env=TMPDIR=/tmp/job_tmp")

            for t, v in self.environment.items():
                runtime.append("--env=%s=%s" % (t, v))

            runtime.append(img_id)
        else:
            env = self.environment
            if not os.path.exists(self.tmpdir):
                os.makedirs(self.tmpdir)
            env["TMPDIR"] = self.tmpdir

        stdin = None
        stdout = None

        _logger.info(
            "[job %s] %s$ %s%s%s", id(self), self.outdir, " ".join([
                shellescape.quote(arg) if needs_shell_quoting(arg) else arg
                for arg in (runtime + self.command_line)
            ]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' %
            os.path.join(self.outdir, self.stdout) if self.stdout else '')

        if dry_run:
            return (self.outdir, {})

        outputs = {}

        try:
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.symlink(self.generatefiles[t]["path"],
                               os.path.join(self.outdir, t))
                else:
                    with open(os.path.join(self.outdir, t), "w") as f:
                        f.write(self.generatefiles[t])

            if self.stdin:
                stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb")
            else:
                stdin = subprocess.PIPE

            if self.stdout:
                absout = os.path.join(self.outdir, self.stdout)
                dn = os.path.dirname(absout)
                if dn and not os.path.exists(dn):
                    os.makedirs(dn)
                stdout = open(absout, "wb")
            else:
                stdout = sys.stderr

            sp = subprocess.Popen(runtime + self.command_line,
                                  shell=False,
                                  close_fds=True,
                                  stdin=stdin,
                                  stdout=stdout,
                                  env=env,
                                  cwd=self.outdir)

            if stdin == subprocess.PIPE:
                sp.stdin.close()

            rcode = sp.wait()

            if stdin != subprocess.PIPE:
                stdin.close()

            if stdout is not sys.stderr:
                stdout.close()

            if self.successCodes and rcode in self.successCodes:
                processStatus = "success"
            elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                processStatus = "temporaryFail"
            elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                processStatus = "permanentFail"
            elif rcode == 0:
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.remove(os.path.join(self.outdir, t))
                    os.symlink(
                        self.pathmapper.reversemap(
                            self.generatefiles[t]["path"])[1],
                        os.path.join(self.outdir, t))

            outputs = self.collect_outputs(self.outdir)

        except OSError as e:
            if e.errno == 2:
                if runtime:
                    _logger.error("'%s' not found", runtime[0])
                else:
                    _logger.error("'%s' not found", self.command_line[0])
            else:
                _logger.exception("Exception while running job")
            processStatus = "permanentFail"
        except WorkflowException as e:
            _logger.error("Error while running job: %s" % e)
            processStatus = "permanentFail"
        except Exception as e:
            _logger.exception("Exception while running job")
            processStatus = "permanentFail"

        if processStatus != "success":
            _logger.warn("[job %s] completed %s", id(self), processStatus)
        else:
            _logger.debug("[job %s] completed %s", id(self), processStatus)
        _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4))

        self.output_callback(outputs, processStatus)

        if rm_tmpdir:
            _logger.debug("[job %s] Removing temporary directory %s", id(self),
                          self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

        if move_outputs and empty_subtree(self.outdir):
            _logger.debug("[job %s] Removing empty output directory %s",
                          id(self), self.tmpdir)
            shutil.rmtree(self.outdir, True)
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            if "glob" in binding:
                r = []
                bg = builder.do_eval(binding["glob"])
                for gb in aslist(bg):
                    r.extend([{
                        "path": g,
                        "class": "File"
                    } for g in builder.fs_access.glob(os.path.join(outdir, gb))
                              ])
                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024 * 1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if schema["type"] == "File" and (not isinstance(r, dict)
                                                 or "path" not in r):
                    raise WorkflowException(
                        "Expression must return a file object.")

            if schema["type"] == "File":
                if not r:
                    raise WorkflowException(
                        "No matches for output file with glob: {}.".format(
                            binding["glob"]))
                if len(r) > 1:
                    raise WorkflowException(
                        "Multiple matches for output item that is a single file."
                    )
                r = r[0]

            if schema["type"] == "File" and "secondaryFiles" in binding:
                r["secondaryFiles"] = []
                for sf in aslist(binding["secondaryFiles"]):
                    if isinstance(sf, dict):
                        sfpath = builder.do_eval(sf, context=r["path"])
                    else:
                        sfpath = {
                            "path": substitute(r["path"], sf),
                            "class": "File"
                        }
                    if isinstance(sfpath, list):
                        r["secondaryFiles"].extend(sfpath)
                    else:
                        r["secondaryFiles"].append(sfpath)

                for sf in r["secondaryFiles"]:
                    if not builder.fs_access.exists(sf["path"]):
                        raise WorkflowException(
                            "Missing secondary file of '%s' of primary file '%s'"
                            % (sf["path"], r["path"]))

        if not r and schema["type"] == "record":
            r = {}
            for f in schema["fields"]:
                r[f["name"]] = self.collect_output(f, builder, outdir)

        return r
Beispiel #23
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""),
                             joborder)

        self.adjust_for_scatter(self.steps)

        random.shuffle(self.steps)

        self.state = {}
        self.processStatus = "success"
        for i in self.tool["inputs"]:
            (_, iid) = urlparse.urldefrag(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException(
                    "Input '%s' not in input object and does not have a default value."
                    % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None
            s.submitted = False
            s.completed = False

        if "outdir" in kwargs:
            outdir = kwargs["outdir"]
            del kwargs["outdir"]
        else:
            outdir = tempfile.mkdtemp()

        actual_jobs = []

        completed = 0
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False
            completed = 0
            for step in self.steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, **kwargs):
                        if newjob:
                            made_progress = True
                            actual_jobs.append(newjob)
                        yield newjob
            if not made_progress and completed < len(self.steps):
                yield None

        wo = self.object_from_state(self.tool["outputs"], True)

        if kwargs.get("move_outputs", True):
            targets = set()
            conflicts = set()

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir) + 1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir) + 1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (
                                sp[0], str(random.randint(1,
                                                          1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.info("Moving '%s' to '%s'", src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in actual_jobs:
                if a.outdir:
                    _logger.info("Removing intermediate output directory %s",
                                 a.outdir)
                    shutil.rmtree(a.outdir, True)

        output_callback(wo, self.processStatus)
Beispiel #24
0
def idk(key):
    if len(key) <= 1:
        raise WorkflowException("Identifier is too short")
    if key[0] != '#':
        raise WorkflowException("Must start with #")
    return key[1:]
Beispiel #25
0
    def try_make_job(self, step, basedir, **kwargs):
        inputobj = {}

        if "scatter" in step.tool:
            if not self.check_feature("ScatterFeature", kwargs):
                raise WorkflowException(
                    "Must include ScatterFeature in requirements.")
            inputparms = copy.deepcopy(step.tool["inputs"])
            outputparms = copy.deepcopy(step.tool["outputs"])
            scatter = aslist(step.tool["scatter"])

            inp_map = {i["id"]: i for i in inputparms}
            for s in aslist(step.tool["scatter"]):
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if step.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(aslist(step.tool["scatter"]))
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
        else:
            inputparms = step.tool["inputs"]
            outputparms = step.tool["outputs"]

        for inp in inputparms:
            _logger.debug(inp)
            iid = idk(inp["id"])
            if "connect" in inp:
                connections = inp["connect"]
                is_array = isinstance(inp["type"],
                                      dict) and inp["type"]["type"] == "array"
                for connection in aslist(connections):
                    src = idk(connection["source"])
                    if src in self.state and self.state[src] is not None:
                        if self.state[src].parameter["type"] == inp["type"]:
                            # source and input types are the same
                            if is_array and iid in inputobj:
                                # there's already a value in the input object, so extend the existing array
                                inputobj[iid].extend(self.state[src].value)
                            else:
                                # simply assign the value from state to input
                                inputobj[iid] = copy.deepcopy(
                                    self.state[src].value)
                        elif is_array and self.state[src].parameter[
                                "type"] == inp["type"]["items"]:
                            # source type is the item type on the input array
                            # promote single item to array entry
                            if iid in inputobj:
                                inputobj[iid].append(self.state[src].value)
                            else:
                                inputobj[iid] = [self.state[src].value]
                        else:
                            raise WorkflowException(
                                "Type mismatch between '%s' (%s) and '%s' (%s)"
                                % (src, self.state[src].parameter["type"],
                                   idk(inp["id"]), inp["type"]))
                    elif src not in self.state:
                        raise WorkflowException(
                            "Connect source '%s' on parameter '%s' does not exist"
                            % (src, inp["id"]))
                    else:
                        return
            elif "default" in inp:
                inputobj[iid] = inp["default"]
            else:
                raise WorkflowException("Value for %s not specified" %
                                        (inp["id"]))

        _logger.info("Creating job with input: %s", inputobj)

        callback = functools.partial(self.receive_output, step, outputparms)

        if step.tool.get("scatter"):
            method = step.tool.get("scatterMethod")
            if method is None and len(aslist(step.tool["scatter"])) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            if method == "dotproduct" or method is None:
                jobs = dotproduct_scatter(step, inputobj, basedir,
                                          aslist(step.tool["scatter"]),
                                          callback, **kwargs)
            elif method == "nested_crossproduct":
                jobs = nested_crossproduct_scatter(
                    step, inputobj, basedir, aslist(step.tool["scatter"]),
                    callback, **kwargs)
            elif method == "flat_crossproduct":
                jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                 aslist(step.tool["scatter"]),
                                                 callback, 0, **kwargs)
        else:
            jobs = step.job(inputobj, basedir, callback, **kwargs)

        for j in jobs:
            yield j
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "cwl:JsonPointer":
        try:
            obj = {
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir
            }
            return avro_ld.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v, obj))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[
                "engine"]:
            runtime = []

            class DR(object):
                pass

            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req,
             docker_is_req) = process.get_feature(dr, "DockerRequirement")
            if docker_req:
                img_id = docker.get_from_requirements(docker_req,
                                                      docker_is_req,
                                                      pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("engineConfig", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(exdef["ref"][7:]) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "engineConfig": exdefs,
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                          json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                                  shell=False,
                                  close_fds=True,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException(
                    "Expression engine returned non-zero exit code on evaluation of\n%s"
                    % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])