Beispiel #1
0
    def __init__(self, toc, j, renderlist, redirects):
        self.typedoc = StringIO.StringIO()
        self.toc = toc
        self.subs = {}
        self.docParent = {}
        self.docAfter = {}
        self.rendered = set()
        self.redirects = redirects
        self.title = None

        for t in j:
            if "extends" in t:
                for e in aslist(t["extends"]):
                    add_dictlist(self.subs, e, t["name"])
                    #if "docParent" not in t and "docAfter" not in t:
                    #    add_dictlist(self.docParent, e, t["name"])

            if t.get("docParent"):
                add_dictlist(self.docParent, t["docParent"], t["name"])

            if t.get("docChild"):
                for c in aslist(t["docChild"]):
                    add_dictlist(self.docParent, t["name"], c)

            if t.get("docAfter"):
                add_dictlist(self.docAfter, t["docAfter"], t["name"])

        _, _, metaschema_loader = schema.get_metaschema()
        alltypes = schema.extend_and_specialize(j, metaschema_loader)

        self.typemap = {}
        self.uses = {}
        self.record_refs = {}
        for t in alltypes:
            self.typemap[t["name"]] = t
            try:
                if t["type"] == "record":
                    self.record_refs[t["name"]] = []
                    for f in t.get("fields", []):
                        p = has_types(f)
                        for tp in p:
                            if tp not in self.uses:
                                self.uses[tp] = []
                            if (t["name"], f["name"]) not in self.uses[tp]:
                                _, frg1 = urlparse.urldefrag(t["name"])
                                _, frg2 = urlparse.urldefrag(f["name"])
                                self.uses[tp].append((frg1, frg2))
                            if tp not in basicTypes and tp not in self.record_refs[t["name"]]:
                                    self.record_refs[t["name"]].append(tp)
            except KeyError as e:
                _logger.error("Did not find 'type' in %s", t)
                raise

        for f in alltypes:
            if (f["name"] in renderlist or
                ((not renderlist) and
                 ("extends" not in f) and
                 ("docParent" not in f) and
                 ("docAfter" not in f))):
                self.render_type(f, 1)
    def __init__(self, toolpath_object, validateAs, docpath):
        self.names = get_schema()
        self.docpath = docpath

        self.tool = toolpath_object

        # Validate tool documument
        validate.validate_ex(self.names.get_name(validateAs, ""), self.tool)

        self.validate_requirements(self.tool, "requirements")
        self.validate_requirements(self.tool, "hints")

        for t in self.tool.get("requirements", []):
            t["_docpath"] = docpath

        for t in self.tool.get("hints", []):
            t["_docpath"] = docpath

        # Import schema defs
        self.schemaDefs = {
            "Any": [
                "null",
                "boolean",
                "int",
                "long",
                "float",
                "double",
                "bytes",
                "string",
                "File",
                {"type": "array", "items": "Any"},
                {"type": "map", "values": "Any"}
            ]}

        sd, _ = get_feature("SchemaDefRequirement", requirements=self.tool.get("requirements"), hints=self.tool.get("hints"))
        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            c["name"] = c["id"][1:]
            del c["id"]
            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            self.inputs_record_schema["fields"].append(c)
        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            c["name"] = c["id"][1:]
            del c["id"]
            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            self.outputs_record_schema["fields"].append(c)
        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
Beispiel #3
0
def scandeps(base, doc, reffields, urlfields, loadref):
    r = []
    if isinstance(doc, dict):
        if "id" in doc:
            if doc["id"].startswith("file://"):
                df, _ = urlparse.urldefrag(doc["id"])
                if base != df:
                    base = df

        for k, v in doc.iteritems():
            if k in reffields:
                for u in aslist(v):
                    sub = loadref(base, u)
                    if isinstance(sub, dict):
                        subid = sub["id"]
                    else:
                        subid = urlparse.urljoin(base, u)
                    deps = {"class": "File", "path": subid}
                    sf = scandeps(subid, sub, reffields, urlfields, loadref)
                    if sf:
                        deps["secondaryFiles"] = sf
                    r.append(deps)
            elif k in urlfields:
                for u in aslist(v):
                    r.append({
                        "class": "File",
                        "path": urlparse.urljoin(base, u)
                    })
            else:
                r.extend(scandeps(base, v, reffields, urlfields, loadref))
    elif isinstance(doc, list):
        for d in doc:
            r.extend(scandeps(base, d, reffields, urlfields, loadref))
    return r
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer":
        try:
            obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir}
            return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v,  obj))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
            runtime = []

            class DR(object):
                pass
            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement")
            img_id = None
            if docker_req:
                img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("engineConfig", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(exdef["ref"][7:]) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "engineConfig": exdefs,
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                                           json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                             shell=False,
                             close_fds=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            if "glob" in binding:
                r = []
                bg = builder.do_eval(binding["glob"])
                for gb in aslist(bg):
                    r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))])
                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024 * 1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if schema["type"] == "File" and (not isinstance(r, dict) or "path" not in r):
                    raise WorkflowException("Expression must return a file object.")

            if schema["type"] == "File":
                if not r:
                    raise WorkflowException("No matches for output file with glob: '{}'".format(bg))
                if len(r) > 1:
                    raise WorkflowException("Multiple matches for output item that is a single file.")
                r = r[0]

            if schema["type"] == "File" and "secondaryFiles" in binding:
                r["secondaryFiles"] = []
                for sf in aslist(binding["secondaryFiles"]):
                    if isinstance(sf, dict):
                        sfpath = builder.do_eval(sf, context=r["path"])
                    else:
                        sfpath = {"path": substitute(r["path"], sf), "class": "File"}
                    if isinstance(sfpath, list):
                        r["secondaryFiles"].extend(sfpath)
                    else:
                        r["secondaryFiles"].append(sfpath)

                for sf in r["secondaryFiles"]:
                    if not builder.fs_access.exists(sf["path"]):
                        raise WorkflowException(
                            "Missing secondary file of '%s' of primary file '%s'" % (sf["path"], r["path"])
                        )

        if not r and schema["type"] == "record":
            r = {}
            for f in schema["fields"]:
                r[f["name"]] = self.collect_output(f, builder, outdir)

        return r
    def __init__(self, toc, j, renderlist, redirects):
        self.typedoc = StringIO.StringIO()
        self.toc = toc
        self.subs = {}
        self.docParent = {}
        self.docAfter = {}
        self.rendered = set()
        self.redirects = redirects
        self.title = None

        for t in j:
            if "extends" in t:
                for e in aslist(t["extends"]):
                    add_dictlist(self.subs, e, t["name"])
                    #if "docParent" not in t and "docAfter" not in t:
                    #    add_dictlist(self.docParent, e, t["name"])

            if t.get("docParent"):
                add_dictlist(self.docParent, t["docParent"], t["name"])

            if t.get("docChild"):
                for c in aslist(t["docChild"]):
                    add_dictlist(self.docParent, t["name"], c)

            if t.get("docAfter"):
                add_dictlist(self.docAfter, t["docAfter"], t["name"])

        _, _, metaschema_loader = schema.get_metaschema()
        alltypes = schema.extend_and_specialize(j, metaschema_loader)

        self.typemap = {}
        self.uses = {}
        self.record_refs = {}
        for t in alltypes:
            self.typemap[t["name"]] = t
            try:
                if t["type"] == "record":
                    self.record_refs[t["name"]] = []
                    for f in t.get("fields", []):
                        p = has_types(f)
                        for tp in p:
                            if tp not in self.uses:
                                self.uses[tp] = []
                            if (t["name"], f["name"]) not in self.uses[tp]:
                                _, frg1 = urlparse.urldefrag(t["name"])
                                _, frg2 = urlparse.urldefrag(f["name"])
                                self.uses[tp].append((frg1, frg2))
                            if tp not in basicTypes and tp not in self.record_refs[
                                    t["name"]]:
                                self.record_refs[t["name"]].append(tp)
            except KeyError as e:
                _logger.error("Did not find 'type' in %s", t)
                raise

        for f in alltypes:
            if (f["name"] in renderlist
                    or ((not renderlist) and ("extends" not in f) and
                        ("docParent" not in f) and ("docAfter" not in f))):
                self.render_type(f, 1)
Beispiel #7
0
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer":
        try:
            obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir}
            return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v,  obj))

    if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
        engineConfig = []
        for r in reversed(requirements):
            if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
                engineConfig = r.get("engineConfig", [])
                break
        return sandboxjs.execjs(ex["script"], jshead(engineConfig, jobinput, context, tmpdir, outdir))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
            runtime = []

            class DR(object):
                pass
            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement")
            img_id = None
            if docker_req:
                img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            inp = {
                "script": ex["script"],
                "engineConfig": r.get("engineConfig", []),
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                                           json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                             shell=False,
                             close_fds=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
Beispiel #8
0
def checkFormat(actualFile, inputFormats, requirements, ontology):
    for af in aslist(actualFile):
        if "format" not in af:
            raise validate.ValidationException("Missing required 'format' for File %s" % af)
        for inpf in aslist(inputFormats):
            if af["format"] == inpf or formatSubclassOf(af["format"], inpf, ontology, set()):
                return
        raise validate.ValidationException("Incompatible file format %s required format(s) %s" % (af["format"], inputFormats))
Beispiel #9
0
    def __init__(self, toolpath_object, **kwargs):
        (_, self.names, _) = get_schema()
        self.tool = toolpath_object
        self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
        if "loader" in kwargs:
            self.formatgraph = kwargs["loader"].graph
        else:
            self.formatgraph = None

        self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema_salad.schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i
            avro.schema.make_avsc_object(av, self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                doc_url, _ = urlparse.urldefrag(c['id'])
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]

                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        try:
            self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException("Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4)))

        try:
            self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException("Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
Beispiel #10
0
def checkFormat(actualFile, inputFormats, requirements, ontology):
    for af in aslist(actualFile):
        if "format" not in af:
            raise validate.ValidationException(
                "Missing required 'format' for File %s" % af)
        for inpf in aslist(inputFormats):
            if af["format"] == inpf or formatSubclassOf(
                    af["format"], inpf, ontology, set()):
                return
        raise validate.ValidationException(
            "Incompatible file format %s required format(s) %s" %
            (af["format"], inputFormats))
def merge_properties(a, b):
    c = {}
    for i in a:
        if i not in b:
            c[i] = a[i]
    for i in b:
        if i not in a:
            c[i] = b[i]
    for i in a:
        if i in b:
            c[i] = aslist(a[i]) + aslist(b[i])

    return c
def merge_properties(a, b):
    c = {}
    for i in a:
        if i not in b:
            c[i] = a[i]
    for i in b:
        if i not in a:
            c[i] = b[i]
    for i in a:
        if i in b:
            c[i] = aslist(a[i]) + aslist(b[i])

    return c
Beispiel #13
0
def object_from_state(state, parms, frag_only, supportsMultipleInput):
    inputobj = {}
    for inp in parms:
        iid = inp["id"]
        if frag_only:
            iid = shortname(iid)
        if "source" in inp:
            if isinstance(inp["source"], list) and not supportsMultipleInput:
                raise WorkflowException("Workflow contains multiple inbound links to a single parameter but MultipleInputFeatureRequirement is not declared.")
            connections = aslist(inp["source"])
            for src in connections:
                if src in state and state[src] is not None:
                    if not match_types(inp["type"], state[src], iid, inputobj,
                                            inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None)),
                                       valueFrom=inp.get("valueFrom")):
                        raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, state[src].parameter["type"], inp["id"], inp["type"]))
                elif src not in state:
                    raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                else:
                    return None
        elif "default" in inp:
            inputobj[iid] = inp["default"]
        elif "valueFrom" in inp:
            inputobj[iid] = None
        else:
            raise WorkflowException("Value for %s not specified" % (inp["id"]))
    return inputobj
def object_from_state(state, parms, frag_only):
    inputobj = {}
    for inp in parms:
        iid = inp["id"]
        if frag_only:
            (_, iid) = urlparse.urldefrag(iid)
            iid = iid.split(".")[-1]
        if "source" in inp:
            connections = aslist(inp["source"])
            for src in connections:
                if src in state and state[src] is not None:
                    if not match_types(
                            inp["type"], state[src], iid, inputobj,
                            inp.get("linkMerge",
                                    ("merge_nested"
                                     if len(connections) > 1 else None))):
                        raise WorkflowException(
                            "Type mismatch between source '%s' (%s) and sink '%s' (%s)"
                            % (src, state[src].parameter["type"], inp["id"],
                               inp["type"]))
                elif src not in state:
                    raise WorkflowException(
                        "Connect source '%s' on parameter '%s' does not exist"
                        % (src, inp["id"]))
                else:
                    return None
        elif "default" in inp:
            inputobj[iid] = inp["default"]
        else:
            raise WorkflowException("Value for %s not specified" % (inp["id"]))
    return inputobj
    def adjust_for_scatter(self, steps):
        (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement")
        for step in steps:
            if scatterSpec and "scatter" in step.tool:
                inputparms = copy.deepcopy(step.tool["inputs"])
                outputparms = copy.deepcopy(step.tool["outputs"])
                scatter = aslist(step.tool["scatter"])

                inp_map = {i["id"]: i for i in inputparms}
                for s in scatter:
                    if s not in inp_map:
                        raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                    inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

                if step.tool.get("scatterMethod") == "nested_crossproduct":
                    nesting = len(scatter)
                else:
                    nesting = 1

                for r in xrange(0, nesting):
                    for i in outputparms:
                        i["type"] = {"type": "array", "items": i["type"]}
                step.tool["inputs"] = inputparms
                step.tool["outputs"] = outputparms
Beispiel #16
0
    def adjust_for_scatter(self, steps):
        (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement")
        for step in steps:
            if scatterSpec and "scatter" in step.tool:
                inputparms = copy.deepcopy(step.tool["inputs"])
                outputparms = copy.deepcopy(step.tool["outputs"])
                scatter = aslist(step.tool["scatter"])

                inp_map = {i["id"]: i for i in inputparms}
                for s in scatter:
                    if s not in inp_map:
                        raise WorkflowException(
                            "Invalid Scatter parameter '%s'" % s)

                    inp_map[s]["type"] = {
                        "type": "array",
                        "items": inp_map[s]["type"]
                    }

                if step.tool.get("scatterMethod") == "nested_crossproduct":
                    nesting = len(scatter)
                else:
                    nesting = 1

                for r in xrange(0, nesting):
                    for i in outputparms:
                        i["type"] = {"type": "array", "items": i["type"]}
                step.tool["inputs"] = inputparms
                step.tool["outputs"] = outputparms
Beispiel #17
0
def scandeps(base, doc, reffields, urlfields, loadref):
    r = []
    if isinstance(doc, dict):
        if "id" in doc:
            if doc["id"].startswith("file://"):
                df, _ = urlparse.urldefrag(doc["id"])
                if base != df:
                    r.append({
                        "class": "File",
                        "path": df
                    })
                    base = df

        for k, v in doc.iteritems():
            if k in reffields:
                for u in aslist(v):
                    if isinstance(u, dict):
                        r.extend(scandeps(base, u, reffields, urlfields, loadref))
                    else:
                        sub = loadref(base, u)
                        subid = urlparse.urljoin(base, u)
                        deps = {
                            "class": "File",
                            "path": subid
                        }
                        sf = scandeps(subid, sub, reffields, urlfields, loadref)
                        if sf:
                            deps["secondaryFiles"] = sf
                        r.append(deps)
            elif k in urlfields:
                for u in aslist(v):
                    r.append({
                        "class": "File",
                        "path": urlparse.urljoin(base, u)
                    })
            else:
                r.extend(scandeps(base, v, reffields, urlfields, loadref))
    elif isinstance(doc, list):
        for d in doc:
            r.extend(scandeps(base, d, reffields, urlfields, loadref))
    return r
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        try:
            inputobj = object_from_state(self.state, inputparms, False)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", id(self),
                              step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s",
                          id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
def exeval(ex, jobinput, requirements, docpath, context, pull_image):
    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[
                "engine"]:
            if r["id"][0] != "#":
                with open(os.path.join(docpath, r["id"])) as f:
                    ex_obj = yaml.load(f)
                sch = process.get_schema()
                validate.validate_ex(
                    sch.get_name("ExpressionEngineRequirement", ""), ex_obj)
                r = ex_obj

            runtime = []
            img_id = docker.get_from_requirements(r.get("requirements"),
                                                  r.get("hints"), pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("expressionDefs", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(os.path.join(r["_docpath"], exdef["ref"])) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "expressionDefs": exdefs,
                "job": jobinput,
                "context": context
            }

            _logger.debug(json.dumps(inp))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                                  shell=False,
                                  close_fds=True,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException(
                    "Expression engine returned non-zero exit code.")

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
Beispiel #20
0
    def try_make_job(self, step, basedir, **kwargs):
        _logger.debug("Try to make job %s", step.id)

        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        try:
            inputobj = self.object_from_state(inputparms, False)
            if inputobj is None:
                return

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            (scatterSpec,
             _) = self.get_requirement("ScatterFeatureRequirement")
            if scatterSpec and "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except Exception as e:
            _logger.error(e)
            self.processStatus = "permanentFail"
            step.completed = True
def exeval(ex, jobinput, requirements, docpath, context, pull_image):
    if ex["engine"] == "JsonPointer":
        return ref_resolver.resolve_pointer({"job": jobinput, "context": context}, ex["script"])

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
            if r["id"][0] != "#":
                with open(os.path.join(docpath, r["id"])) as f:
                    ex_obj = yaml.load(f)
                sch = process.get_schema()
                validate.validate_ex(sch.get_name("ExpressionEngineRequirement", ""), ex_obj)
                r = ex_obj

            runtime = []
            img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("expressionDefs", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(os.path.join(r["_docpath"], exdef["ref"])) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "expressionDefs": exdefs,
                "job": jobinput,
                "context": context
            }

            _logger.debug(json.dumps(inp))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                             shell=False,
                             close_fds=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException("Expression engine returned non-zero exit code.")

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
    def add_schemas(self, ns, base_url):
        for sch in aslist(ns):
            self.graph.parse(urlparse.urljoin(base_url, sch))

        for s, _, _ in self.graph.triples( (None, RDF.type, RDF.Property) ):
            self._add_properties(s)
        for s, _, o in self.graph.triples( (None, RDFS.subPropertyOf, None) ):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples( (None, RDFS.range, None) ):
            self._add_properties(s)
        for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty) ):
            self._add_properties(s)

        for s, _, _ in self.graph.triples( (None, None, None) ):
            self.idx[str(s)] = True
    def add_schemas(self, ns, base_url):
        for sch in aslist(ns):
            self.graph.parse(urlparse.urljoin(base_url, sch))

        for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
            self._add_properties(s)
        for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples((None, RDFS.range, None)):
            self._add_properties(s)
        for s, _, _ in self.graph.triples(
            (None, RDF.type, OWL.ObjectProperty)):
            self._add_properties(s)

        for s, _, _ in self.graph.triples((None, None, None)):
            self.idx[str(s)] = True
Beispiel #24
0
def _draftDraft3dev1toDev2(doc, loader, baseuri):
    doc = _updateDev2Script(doc)
    if isinstance(doc, basestring):
        return doc

    # Convert expressions
    if isinstance(doc, dict):
        if "@import" in doc:
            r, _ = loader.resolve_ref(doc["@import"], base_url=baseuri)
            return _draftDraft3dev1toDev2(r, loader, r["id"])

        for a in doc:
            doc[a] = _draftDraft3dev1toDev2(doc[a], loader, baseuri)

        if "class" in doc and (doc["class"] in ("CommandLineTool", "Workflow",
                                                "ExpressionTool")):
            added = False
            if "requirements" in doc:
                for r in doc["requirements"]:
                    if r["class"] == "ExpressionEngineRequirement":
                        if "engineConfig" in r:
                            doc["requirements"].append({
                                "class":
                                "InlineJavascriptRequirement",
                                "expressionLib": [
                                    updateScript(sc)
                                    for sc in aslist(r["engineConfig"])
                                ]
                            })
                            added = True
                        doc["requirements"] = [
                            rq for rq in doc["requirements"]
                            if rq["class"] != "ExpressionEngineRequirement"
                        ]
                        break
            else:
                doc["requirements"] = []
            if not added:
                doc["requirements"].append(
                    {"class": "InlineJavascriptRequirement"})

    elif isinstance(doc, list):
        return [_draftDraft3dev1toDev2(a, loader, baseuri) for a in doc]

    return doc
Beispiel #25
0
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])

        try:
            inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", id(self), step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step, outputparms)

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
Beispiel #26
0
    def add_schemas(self, ns, base_url):
        for sch in aslist(ns):
            try:
                self.graph.parse(urlparse.urljoin(base_url, sch), format="xml")
            except xml.sax.SAXParseException:
                self.graph.parse(urlparse.urljoin(base_url, sch), format="turtle")

        for s, _, _ in self.graph.triples( (None, RDF.type, RDF.Property) ):
            self._add_properties(s)
        for s, _, o in self.graph.triples( (None, RDFS.subPropertyOf, None) ):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples( (None, RDFS.range, None) ):
            self._add_properties(s)
        for s, _, _ in self.graph.triples( (None, RDF.type, OWL.ObjectProperty) ):
            self._add_properties(s)

        for s, _, _ in self.graph.triples( (None, None, None) ):
            self.idx[str(s)] = True
Beispiel #27
0
    def add_schemas(self, ns, base_url):
        for sch in aslist(ns):
            try:
                self.graph.parse(urlparse.urljoin(base_url, sch), format="xml")
            except xml.sax.SAXParseException:
                self.graph.parse(urlparse.urljoin(base_url, sch),
                                 format="turtle")

        for s, _, _ in self.graph.triples((None, RDF.type, RDF.Property)):
            self._add_properties(s)
        for s, _, o in self.graph.triples((None, RDFS.subPropertyOf, None)):
            self._add_properties(s)
            self._add_properties(o)
        for s, _, _ in self.graph.triples((None, RDFS.range, None)):
            self._add_properties(s)
        for s, _, _ in self.graph.triples(
            (None, RDF.type, OWL.ObjectProperty)):
            self._add_properties(s)

        for s, _, _ in self.graph.triples((None, None, None)):
            self.idx[str(s)] = True
    def try_make_job(self, step, basedir, **kwargs):
        _logger.debug("Try to make job %s", step.id)

        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        try:
            inputobj = self.object_from_state(inputparms, False)
            if inputobj is None:
                return

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step, outputparms)

            (scatterSpec, _) = self.get_requirement("ScatterFeatureRequirement")
            if scatterSpec and "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs)
            else:
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except Exception as e:
            _logger.error(e)
            self.processStatus = "permanentFail"
            step.completed = True
Beispiel #29
0
    def __init__(self, toc, j):
        self.typedoc = StringIO.StringIO()
        self.toc = toc
        self.subs = {}
        self.docParent = {}
        self.docAfter = {}
        for t in j:
            if "extends" in t:
                for e in aslist(t["extends"]):
                    add_dictlist(self.subs, e, t["name"])
                    if "docParent" not in t and "docAfter" not in t:
                        add_dictlist(self.docParent, e, t["name"])

            if t.get("docParent"):
                add_dictlist(self.docParent, t["docParent"], t["name"])

            if t.get("docAfter"):
                add_dictlist(self.docAfter, t["docAfter"], t["name"])

        _, _, metaschema_loader = schema.get_metaschema()
        alltypes = schema.extend_and_specialize(j, metaschema_loader)

        self.typemap = {}
        self.uses = {}
        for t in alltypes:
            self.typemap[t["name"]] = t
            if t["type"] == "https://w3id.org/cwl/salad#record":
                for f in t["fields"]:
                    p = has_types(f)
                    for tp in p:
                        if tp not in self.uses:
                            self.uses[tp] = []
                        if (t["name"], f["name"]) not in self.uses[tp]:
                            _, frg1 = urlparse.urldefrag(t["name"])
                            _, frg2 = urlparse.urldefrag(f["name"])
                            self.uses[tp].append((frg1, frg2))

        for f in alltypes:
            if ("extends" not in f) and ("docParent" not in f) and ("docAfter" not in f):
                self.render_type(f, 1)
 def object_from_state(self, parms, frag_only):
     inputobj = {}
     for inp in parms:
         iid = inp["id"]
         if frag_only:
             (_, iid) = urlparse.urldefrag(iid)
             iid = iid.split(".")[-1]
         if "source" in inp:
             connections = aslist(inp["source"])
             for src in connections:
                 if src in self.state and self.state[src] is not None:
                     if not self.match_types(inp["type"], self.state[src], iid, inputobj,
                                             inp.get("linkMerge", ("merge_nested" if len(connections) > 1 else None))):
                         raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"]))
                 elif src not in self.state:
                     raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                 else:
                     return None
         elif "default" in inp:
             inputobj[iid] = inp["default"]
         else:
             raise WorkflowException("Value for %s not specified" % (inp["id"]))
     return inputobj
Beispiel #31
0
def _draftDraft3dev1toDev2(doc, loader, baseuri):
    doc = _updateDev2Script(doc)
    if isinstance(doc, basestring):
        return doc

    # Convert expressions
    if isinstance(doc, dict):
        if "@import" in doc:
            r, _ = loader.resolve_ref(doc["@import"], base_url=baseuri)
            return _draftDraft3dev1toDev2(r, loader, r["id"])

        for a in doc:
            doc[a] = _draftDraft3dev1toDev2(doc[a], loader, baseuri)

        if "class" in doc and (doc["class"] in ("CommandLineTool", "Workflow", "ExpressionTool")):
            added = False
            if "requirements" in doc:
                for r in doc["requirements"]:
                    if r["class"] == "ExpressionEngineRequirement":
                        if "engineConfig" in r:
                            doc["requirements"].append({
                                "class":"InlineJavascriptRequirement",
                                "expressionLib": [updateScript(sc) for sc in aslist(r["engineConfig"])]
                            })
                            added = True
                        doc["requirements"] = [rq for rq in doc["requirements"] if rq["class"] != "ExpressionEngineRequirement"]
                        break
            else:
                doc["requirements"] = []
            if not added:
                doc["requirements"].append({"class":"InlineJavascriptRequirement"})

    elif isinstance(doc, list):
        return [_draftDraft3dev1toDev2(a, loader, baseuri) for a in doc]

    return doc
    def resolve_all(self, document, base_url, file_base=None):
        loader = self
        metadata = {}
        if file_base is None:
            file_base = base_url

        if isinstance(document, dict):
            # Handle $import and $include
            if ('$import' in document or '$include' in document):
                return self.resolve_ref(document, file_base)
        elif isinstance(document, list):
            pass
        else:
            return document, metadata

        newctx = None
        if isinstance(document, dict):
            # Handle $base, $profile, $namespaces, $schemas and $graph
            if "$base" in document:
                base_url = document["$base"]

            if "$profile" in document:
                if not newctx:
                    newctx = SubLoader(self)
                prof = self.fetch(document["$profile"])
                newctx.add_namespaces(document.get("$namespaces", {}),
                                      document["$profile"])
                newctx.add_schemas(document.get("$schemas", []),
                                   document["$profile"])

            if "$namespaces" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_namespaces(document["$namespaces"])

            if "$schemas" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_schemas(document["$schemas"], file_base)

            if newctx:
                loader = newctx

            if "$graph" in document:
                metadata = {k: v for k, v in document.items() if k != "$graph"}
                document = document["$graph"]
                metadata, _ = loader.resolve_all(metadata, base_url, file_base)

        if isinstance(document, dict):
            for identifer in loader.identity_links:
                if identifer in document:
                    if isinstance(document[identifer], basestring):
                        document[identifer] = loader.expand_url(
                            document[identifer], base_url, scoped=True)
                        if document[identifer] not in loader.idx or isinstance(
                                loader.idx[document[identifer]], basestring):
                            loader.idx[document[identifer]] = document
                        base_url = document[identifer]
                    elif isinstance(document[identifer], list):
                        for n, v in enumerate(document[identifer]):
                            document[identifer][n] = loader.expand_url(
                                document[identifer][n], base_url, scoped=True)
                            if document[identifer][n] not in loader.idx:
                                loader.idx[document[identifer]
                                           [n]] = document[identifer][n]

            for d in document:
                d2 = loader.expand_url(d, "", scoped=False, vocab_term=True)
                if d != d2:
                    document[d2] = document[d]
                    del document[d]

            for d in loader.url_fields:
                if d in document:
                    if isinstance(document[d], basestring):
                        document[d] = loader.expand_url(
                            document[d],
                            base_url,
                            scoped=False,
                            vocab_term=(d in loader.vocab_fields))
                    elif isinstance(document[d], list):
                        document[d] = [
                            loader.expand_url(
                                url,
                                base_url,
                                scoped=False,
                                vocab_term=(d in loader.vocab_fields))
                            if isinstance(url, basestring) else url
                            for url in document[d]
                        ]

            try:
                for key, val in document.items():
                    document[key], _ = loader.resolve_all(
                        val, base_url, file_base)
            except validate.ValidationException as v:
                _logger.debug("loader is %s", id(loader))
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in field %s:\n%s" %
                    (id(loader), file_base, key, validate.indent(str(v))))

        elif isinstance(document, list):
            i = 0
            try:
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and "$import" in val:
                        l, _ = loader.resolve_ref(val, file_base)
                        if isinstance(l, list):
                            del document[i]
                            for item in aslist(l):
                                document.insert(i, item)
                                i += 1
                        else:
                            document[i] = l
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(
                            val, base_url, file_base)
                        i += 1
            except validate.ValidationException as v:
                raise validate.ValidationException(
                    "(%s) (%s) Validation error in position %i:\n%s" %
                    (id(loader), file_base, i, validate.indent(str(v))))

            for identifer in loader.identity_links:
                if identifer in metadata:
                    if isinstance(metadata[identifer], basestring):
                        metadata[identifer] = loader.expand_url(
                            metadata[identifer], base_url, scoped=True)
                        loader.idx[metadata[identifer]] = document

        return document, metadata
Beispiel #33
0
    def __init__(self, toolpath_object, **kwargs):
        (_, self.names, _) = get_schema()
        self.tool = toolpath_object
        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
        if "loader" in kwargs:
            self.formatgraph = kwargs["loader"].graph

        self.validate_hints(self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema_salad.schema.make_valid_avro(
                sdtypes, {t["name"]: t
                          for t in sdtypes}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i
            avro.schema.make_avsc_object(av, self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                doc_url, _ = urlparse.urldefrag(c['id'])
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException(
                        "Missing `type` in parameter `%s`" % c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]

                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        try:
            self.inputs_record_schema = schema_salad.schema.make_valid_avro(
                self.inputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                "Got error `%s` while prcoessing inputs of %s:\n%s" %
                (str(e), self.tool["id"],
                 json.dumps(self.inputs_record_schema, indent=4)))

        try:
            self.outputs_record_schema = schema_salad.schema.make_valid_avro(
                self.outputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.outputs_record_schema,
                                         self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                "Got error `%s` while prcoessing outputs of %s:\n%s" %
                (str(e), self.tool["id"],
                 json.dumps(self.outputs_record_schema, indent=4)))
Beispiel #34
0
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])

        try:
            inputobj = object_from_state(self.state, inputparms, False, supportsMultipleInput)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", self.name, step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s", id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step, outputparms)

            valueFrom = {i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i}

            if len(valueFrom) > 0 and not bool(self.workflow.get_requirement("StepInputExpressionRequirement")[0]):
                raise WorkflowException("Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements")

            vfinputs = {shortname(k): v for k,v in inputobj.iteritems()}
            def valueFromFunc(k, v):
                if k in valueFrom:
                    return expression.do_eval(valueFrom[k], vfinputs, self.workflow.requirements,
                                       None, None, {}, context=v)
                else:
                    return v

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")
                if "valueFrom" not in kwargs:
                    kwargs["valueFrom"] = valueFromFunc
                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj,
                                                       basedir, scatter, callback, **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0, **kwargs)
            else:
                _logger.debug("[workflow %s] Job is input %s", self.name, json.dumps(inputobj, indent=4))
                inputobj = {k: valueFromFunc(k, v) for k,v in inputobj.items()}
                _logger.debug("[workflow %s] Evaluated job input to %s", self.name, json.dumps(inputobj, indent=4))
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
    def resolve_all(self, document, base_url, file_base=None):
        loader = self
        metadata = {}
        if file_base is None:
            file_base = base_url

        if isinstance(document, dict):
            # Handle $import and $include
            if ('$import' in document or '$include' in document):
                return self.resolve_ref(document, file_base)
        elif isinstance(document, list):
            pass
        else:
            return document, metadata

        newctx = None
        if isinstance(document, dict):
            # Handle $base, $profile, $namespaces, $schemas and $graph
            if "$base" in document:
                base_url = document["$base"]

            if "$profile" in document:
                if not newctx:
                    newctx = SubLoader(self)
                prof = self.fetch(document["$profile"])
                newctx.add_namespaces(document.get("$namespaces", {}), document["$profile"])
                newctx.add_schemas(document.get("$schemas", []), document["$profile"])

            if "$namespaces" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_namespaces(document["$namespaces"])

            if "$schemas" in document:
                if not newctx:
                    newctx = SubLoader(self)
                newctx.add_schemas(document["$schemas"], file_base)

            if newctx:
                loader = newctx

            if "$graph" in document:
                metadata = {k: v for k,v in document.items() if k != "$graph"}
                document = document["$graph"]
                metadata, _ = loader.resolve_all(metadata, base_url, file_base)

        if isinstance(document, dict):
            for identifer in loader.identity_links:
                if identifer in document:
                    if isinstance(document[identifer], basestring):
                        document[identifer] = loader.expand_url(document[identifer], base_url, scoped=True)
                        if document[identifer] not in loader.idx or isinstance(loader.idx[document[identifer]], basestring):
                            loader.idx[document[identifer]] = document
                        base_url = document[identifer]
                    elif isinstance(document[identifer], list):
                        for n, v in enumerate(document[identifer]):
                            document[identifer][n] = loader.expand_url(document[identifer][n], base_url, scoped=True)
                            if document[identifer][n] not in loader.idx:
                                loader.idx[document[identifer][n]] = document[identifer][n]

            for d in document:
                d2 = loader.expand_url(d, "", scoped=False, vocab_term=True)
                if d != d2:
                    document[d2] = document[d]
                    del document[d]

            for d in loader.url_fields:
                if d in document:
                    if isinstance(document[d], basestring):
                        document[d] = loader.expand_url(document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields))
                    elif isinstance(document[d], list):
                        document[d] = [loader.expand_url(url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ]

            try:
                for key, val in document.items():
                    document[key], _ = loader.resolve_all(val, base_url, file_base)
            except validate.ValidationException as v:
                _logger.debug("loader is %s", id(loader))
                raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (id(loader), file_base, key, validate.indent(str(v))))

        elif isinstance(document, list):
            i = 0
            try:
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and "$import" in val:
                        l, _ = loader.resolve_ref(val, file_base)
                        if isinstance(l, list):
                            del document[i]
                            for item in aslist(l):
                                document.insert(i, item)
                                i += 1
                        else:
                            document[i] = l
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(val, base_url, file_base)
                        i += 1
            except validate.ValidationException as v:
                raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (id(loader), file_base, i, validate.indent(str(v))))

        return document, metadata
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            if "glob" in binding:
                r = []
                bg = builder.do_eval(binding["glob"])
                for gb in aslist(bg):
                    r.extend([{
                        "path": g,
                        "class": "File"
                    } for g in builder.fs_access.glob(os.path.join(outdir, gb))
                              ])
                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024 * 1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if schema["type"] == "File" and (not isinstance(r, dict)
                                                 or "path" not in r):
                    raise WorkflowException(
                        "Expression must return a file object.")

            if schema["type"] == "File":
                if not r:
                    raise WorkflowException(
                        "No matches for output file with glob: {}.".format(
                            binding["glob"]))
                if len(r) > 1:
                    raise WorkflowException(
                        "Multiple matches for output item that is a single file."
                    )
                r = r[0]

            if schema["type"] == "File" and "secondaryFiles" in binding:
                r["secondaryFiles"] = []
                for sf in aslist(binding["secondaryFiles"]):
                    if isinstance(sf, dict):
                        sfpath = builder.do_eval(sf, context=r["path"])
                    else:
                        sfpath = {
                            "path": substitute(r["path"], sf),
                            "class": "File"
                        }
                    if isinstance(sfpath, list):
                        r["secondaryFiles"].extend(sfpath)
                    else:
                        r["secondaryFiles"].append(sfpath)

                for sf in r["secondaryFiles"]:
                    if not builder.fs_access.exists(sf["path"]):
                        raise WorkflowException(
                            "Missing secondary file of '%s' of primary file '%s'"
                            % (sf["path"], r["path"]))

        if not r and schema["type"] == "record":
            r = {}
            for f in schema["fields"]:
                r[f["name"]] = self.collect_output(f, builder, outdir)

        return r
    def __init__(self, toolpath_object, pos, **kwargs):
        try:
            makeTool = kwargs.get("makeTool")
            self.embedded_tool = makeTool(toolpath_object["run"], **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"]["id"], validate.indent(str(v))))

        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                (_, d) = urlparse.urldefrag(inputid)
                frag = d.split(".")[-1]
                p = urlparse.urljoin(toolpath_object["run"].get("id", self.id),
                                     "#" + frag)
                found = False
                for a in self.embedded_tool.tool[field]:
                    if a["id"] == p:
                        i.update(a)
                        found = True
                if not found:
                    raise WorkflowException(
                        "Did not find %s parameter '%s' in workflow step" %
                        (field, p))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object,
                                           "Process",
                                           do_validate=False,
                                           **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not declared"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement not declared"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
    def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs):
        (_, self.names) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
    def resolve_all(self, document, base_url):
        loader = self
        metadata = {}

        if isinstance(document, dict):
            inc = '@include' in document
            if  '@import' in document or '@include' in document:
                document, _ = self.resolve_ref(document, base_url)
            else:
                for identifer in self.identity_links:
                    if identifer in document:
                        if isinstance(document[identifer], basestring):
                            document[identifer] = self.expand_url(document[identifer], base_url, scoped=True)
                            if document[identifer] not in self.idx or isinstance(self.idx[document[identifer]], basestring):
                                self.idx[document[identifer]] = document
                            base_url = document[identifer]
                        elif isinstance(document[identifer], list):
                            for n, v in enumerate(document[identifer]):
                                document[identifer][n] = self.expand_url(document[identifer][n], base_url, scoped=True)
                                if document[identifer][n] not in self.idx:
                                    self.idx[document[identifer][n]] = document[identifer][n]
            if inc:
                return document, {}

            if isinstance(document, dict) and "@context" in document:
                loader = Loader(self.ctx)
                loader.idx = self.idx
                loader.add_context(document["@context"])
                if "@base" in loader.ctx:
                    base_url = loader.ctx["@base"]

            if "@graph" in document:
                metadata = {k: v for k,v in document.items() if k != "@graph"}
                document = document["@graph"]
                metadata, _ = self.resolve_all(metadata, base_url)

        elif isinstance(document, list):
            pass
        else:
            return document, metadata

        try:
            if isinstance(document, dict):
                for d in document:
                    d2 = self.expand_url(d, "", scoped=False, vocab_term=True)
                    if d != d2:
                        document[d2] = document[d]
                        del document[d]

                for d in loader.url_fields:
                    if d in document:
                        if isinstance(document[d], basestring):
                            document[d] = loader.expand_url(document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields))
                        elif isinstance(document[d], list):
                            document[d] = [loader.expand_url(url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ]

                for key, val in document.items():
                    document[key], _ = loader.resolve_all(val, base_url)

            elif isinstance(document, list):
                i = 0
                while i < len(document):
                    val = document[i]
                    if isinstance(val, dict) and "@import" in val and val.get("inline"):
                        l, _ = loader.resolve_all(val, base_url)
                        del document[i]
                        for item in aslist(l):
                            document.insert(i, item)
                            i += 1
                    else:
                        document[i], _ = loader.resolve_all(val, base_url)
                        i += 1

        except validate.ValidationException as v:
            if isinstance(key, basestring):
                raise validate.ValidationException("Validation error in field %s:\n%s" % (key, validate.indent(str(v))))
            else:
                raise validate.ValidationException("Validation error in position %i:\n%s" % (key, validate.indent(str(v))))

        return document, metadata
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "cwl:JsonPointer":
        try:
            obj = {
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir
            }
            return avro_ld.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v, obj))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[
                "engine"]:
            runtime = []

            class DR(object):
                pass

            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req,
             docker_is_req) = process.get_feature(dr, "DockerRequirement")
            if docker_req:
                img_id = docker.get_from_requirements(docker_req,
                                                      docker_is_req,
                                                      pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("engineConfig", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(exdef["ref"][7:]) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "engineConfig": exdefs,
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                          json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                                  shell=False,
                                  close_fds=True,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException(
                    "Expression engine returned non-zero exit code on evaluation of\n%s"
                    % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
Beispiel #41
0
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" %
                                               (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[
                    "type"] in ("array", "map"):
                st["inputBinding"] = {}
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["values"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["items"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding and binding.get("loadContents"):
                    with self.fs_access.open(datum["path"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                            if isinstance(sfpath, basestring):
                                sfpath = {"path": sfpath, "class": "File"}
                        else:
                            sfpath = {
                                "path": substitute(datum["path"], sf),
                                "class": "File"
                            }
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                for sf in datum.get("secondaryFiles", []):
                    self.files.append(sf)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
    def try_make_job(self, step, basedir, **kwargs):
        inputobj = {}

        _logger.debug("Try to make job %s", step.id)

        (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints"))
        if scatterSpec:
            inputparms = copy.deepcopy(step.tool["inputs"])
            outputparms = copy.deepcopy(step.tool["outputs"])
            scatter = aslist(scatterSpec["scatter"])

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

            if scatterSpec.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
        else:
            inputparms = step.tool["inputs"]
            outputparms = step.tool["outputs"]

        for inp in inputparms:
            _logger.debug("Trying input %s", inp)
            iid = inp["id"]
            if "connect" in inp:
                connections = inp["connect"]
                for connection in aslist(connections):
                    src = connection["source"]
                    if src in self.state and self.state[src] is not None:
                        if not self.match_types(inp["type"], self.state[src], inp["id"], inputobj):
                            raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"]))
                    elif src not in self.state:
                        raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                    else:
                        return
            elif "default" in inp:
                inputobj[iid] = inp["default"]
            else:
                raise WorkflowException("Value for %s not specified" % (inp["id"]))

        _logger.info("Creating job with input: %s", pprint.pformat(inputobj))

        callback = functools.partial(self.receive_output, step, outputparms)

        if scatterSpec:
            method = scatterSpec.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

            if method == "dotproduct" or method is None:
                jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "nested_crossproduct":
                jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "flat_crossproduct":
                jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs)
        else:
            jobs = step.job(inputobj, basedir, callback, **kwargs)

        for j in jobs:
            yield j
Beispiel #43
0
 def extendsfrom(item, ex):
     if "extends" in item:
         for e in aslist(item["extends"]):
             ex.insert(0, self.typemap[e])
             extendsfrom(self.typemap[e], ex)
Beispiel #44
0
def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
    if t["type"] == "record":
        recordname = t["name"]

        _logger.debug("Processing record %s\n", t)

        classnode = URIRef(recordname)
        g.add((classnode, RDF.type, RDFS.Class))

        split = urlparse.urlsplit(recordname)
        if "jsonldPrefix" in t:
            predicate = "%s:%s" % (t["jsonldPrefix"], recordname)
        elif split.scheme:
            (ns, ln) = rdflib.namespace.split_uri(unicode(recordname))
            predicate = recordname
            recordname = ln
        else:
            predicate = "%s:%s" % (defaultPrefix, recordname)

        if context.get(recordname, predicate) != predicate:
            raise Exception("Predicate collision on '%s', '%s' != '%s'" % (recordname, context[t["name"]], predicate))

        if not recordname:
            raise Exception()

        _logger.debug("Adding to context '%s' %s (%s)", recordname, predicate, type(predicate))
        context[recordname] = predicate

        for i in t.get("fields", []):
            fieldname = i["name"]

            _logger.debug("Processing field %s", i)

            v = pred(t, i, fieldname, context, defaultPrefix, namespaces)

            if isinstance(v, basestring):
                v = v if v[0] != "@" else None
            else:
                v = v["_@id"] if v.get("_@id", "@")[0] != "@" else None

            if v:
                (ns, ln) = rdflib.namespace.split_uri(unicode(v))
                if ns[0:-1] in namespaces:
                    propnode = namespaces[ns[0:-1]][ln]
                else:
                    propnode = URIRef(v)

                g.add((propnode, RDF.type, RDF.Property))
                g.add((propnode, RDFS.domain, classnode))

                # TODO generate range from datatype.

            if isinstance(i["type"], dict) and "name" in i["type"]:
                process_type(i["type"], g, context, defaultBase, namespaces, defaultPrefix)

        if "extends" in t:
            for e in aslist(t["extends"]):
                g.add((classnode, RDFS.subClassOf, URIRef(e)))
    elif t["type"] == "enum":
        _logger.debug("Processing enum %s", t["name"])

        for i in t["symbols"]:
            pred(t, None, i, context, defaultBase, namespaces)
def process_type(t, g, context, defaultBase, namespaces, defaultPrefix):
    if t["type"] == "record":
        recordname = t["name"]

        _logger.debug("Processing record %s\n", t)

        classnode = URIRef(recordname)
        g.add((classnode, RDF.type, RDFS.Class))

        split = urlparse.urlsplit(recordname)
        if "jsonldPrefix" in t:
            predicate = "%s:%s" % (t["jsonldPrefix"], recordname)
        elif split.scheme:
            (ns, ln) = rdflib.namespace.split_uri(unicode(recordname))
            predicate = recordname
            recordname = ln
        else:
            predicate = "%s:%s" % (defaultPrefix, recordname)

        if context.get(recordname, predicate) != predicate:
            raise Exception("Predicate collision on '%s', '%s' != '%s'" %
                            (recordname, context[t["name"]], predicate))

        if not recordname:
            raise Exception()

        _logger.debug("Adding to context '%s' %s (%s)", recordname, predicate,
                      type(predicate))
        context[recordname] = predicate

        for i in t.get("fields", []):
            fieldname = i["name"]

            _logger.debug("Processing field %s", i)

            v = pred(t, i, fieldname, context, defaultPrefix, namespaces)

            if isinstance(v, basestring):
                v = v if v[0] != "@" else None
            else:
                v = v["_@id"] if v.get("_@id", "@")[0] != "@" else None

            if v:
                (ns, ln) = rdflib.namespace.split_uri(unicode(v))
                if ns[0:-1] in namespaces:
                    propnode = namespaces[ns[0:-1]][ln]
                else:
                    propnode = URIRef(v)

                g.add((propnode, RDF.type, RDF.Property))
                g.add((propnode, RDFS.domain, classnode))

                # TODO generate range from datatype.

            if isinstance(i["type"], dict) and "name" in i["type"]:
                process_type(i["type"], g, context, defaultBase, namespaces,
                             defaultPrefix)

        if "extends" in t:
            for e in aslist(t["extends"]):
                g.add((classnode, RDFS.subClassOf, URIRef(e)))
    elif t["type"] == "enum":
        _logger.debug("Processing enum %s", t["name"])

        for i in t["symbols"]:
            pred(t, None, i, context, defaultBase, namespaces)
    def __init__(self, toolpath_object, validateAs, docpath):
        self.names = get_schema()
        self.docpath = docpath

        self.tool = toolpath_object

        #if self.tool.get("@context") != TOOL_CONTEXT_URL:
        #    raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL)

        # Validate tool documument
        validate.validate_ex(self.names.get_name(validateAs, ""), self.tool)

        self.validate_requirements(self.tool, "requirements")
        self.validate_requirements(self.tool, "hints")

        for t in self.tool.get("requirements", []):
            t["_docpath"] = docpath

        for t in self.tool.get("hints", []):
            t["_docpath"] = docpath

        # Import schema defs
        self.schemaDefs = {
            "Any": [
                "null",
                "boolean",
                "int",
                "long",
                "float",
                "double",
                "bytes",
                "string",
                "File",
                {"type": "array", "items": "Any"},
                {"type": "map", "values": "Any"}
            ]}

        if self.tool.get("schemaDefs"):
            for i in self.tool["schemaDefs"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            c["name"] = c["id"][1:]
            del c["id"]
            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            self.inputs_record_schema["fields"].append(c)
        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            c["name"] = c["id"][1:]
            del c["id"]
            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            self.outputs_record_schema["fields"].append(c)
        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def extend_and_specialize(items, loader):
    """Apply 'extend' and 'specialize' to fully materialize derived record
    types."""

    types = {t["name"]: t for t in items}
    n = []

    for t in items:
        t = copy.deepcopy(t)
        if "extends" in t:
            if "specialize" in t:
                spec = {
                    sp["specializeFrom"]: sp["specializeTo"]
                    for sp in aslist(t["specialize"])
                }
            else:
                spec = {}

            exfields = []
            exsym = []
            for ex in aslist(t["extends"]):
                if ex not in types:
                    raise Exception(
                        "Extends %s in %s refers to invalid base type" %
                        (t["extends"], t["name"]))

                basetype = copy.deepcopy(types[ex])

                if t["type"] == "record":
                    if spec:
                        basetype["fields"] = replace_type(
                            basetype.get("fields", []), spec, loader, set())

                    for f in basetype.get("fields", []):
                        if "inherited_from" not in f:
                            f["inherited_from"] = ex

                    exfields.extend(basetype.get("fields", []))
                elif t["type"] == "enum":
                    exsym.extend(basetype.get("symbols", []))

            if t["type"] == "record":
                exfields.extend(t.get("fields", []))
                t["fields"] = exfields

                fieldnames = set()
                for field in t["fields"]:
                    if field["name"] in fieldnames:
                        raise validate.ValidationException(
                            "Field name %s appears twice in %s" %
                            (field["name"], t["name"]))
                    else:
                        fieldnames.add(field["name"])

                for y in [x for x in t["fields"] if x["name"] == "class"]:
                    y["type"] = {
                        "type": "enum",
                        "symbols": [r["name"]],
                        "name": r["name"] + "_class",
                    }
                    y["doc"] = "Must be `%s` to indicate this is a %s object." % (
                        r["name"], r["name"])
            elif t["type"] == "enum":
                exsym.extend(t.get("symbols", []))
                t["symbol"] = exsym

            types[t["name"]] = t

        n.append(t)

    ex_types = {t["name"]: t for t in n}

    extended_by = {}
    for t in n:
        if "extends" in t:
            for ex in aslist(t["extends"]):
                if ex_types[ex].get("abstract"):
                    add_dictlist(extended_by, ex, ex_types[t["name"]])
                    add_dictlist(extended_by, avro_name(ex), ex_types[ex])

    for t in n:
        if "fields" in t:
            t["fields"] = replace_type(t["fields"], extended_by, loader, set())

    return n
Beispiel #48
0
    def __init__(self,
                 toolpath_object,
                 validateAs,
                 do_validate=True,
                 **kwargs):
        (_, self.names) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""),
                                     self.tool,
                                     strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException(
                    "Could not validate %s as %s:\n%s" %
                    (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
Beispiel #49
0
    def try_make_job(self, step, basedir, **kwargs):
        inputparms = step.tool["inputs"]
        outputparms = step.tool["outputs"]

        supportsMultipleInput = bool(
            self.workflow.get_requirement("MultipleInputFeatureRequirement")
            [0])

        try:
            inputobj = object_from_state(self.state, inputparms, False,
                                         supportsMultipleInput)
            if inputobj is None:
                _logger.debug("[workflow %s] job step %s not ready", self.name,
                              step.id)
                return

            _logger.debug("[step %s] starting job step %s of workflow %s",
                          id(step), step.id, id(self))

            if step.submitted:
                return

            callback = functools.partial(self.receive_output, step,
                                         outputparms)

            valueFrom = {
                i["id"]: i["valueFrom"]
                for i in step.tool["inputs"] if "valueFrom" in i
            }

            if len(valueFrom) > 0 and not bool(
                    self.workflow.get_requirement(
                        "StepInputExpressionRequirement")[0]):
                raise WorkflowException(
                    "Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements"
                )

            vfinputs = {shortname(k): v for k, v in inputobj.iteritems()}

            def valueFromFunc(k, v):
                if k in valueFrom:
                    return expression.do_eval(valueFrom[k],
                                              vfinputs,
                                              self.workflow.requirements,
                                              None,
                                              None, {},
                                              context=v)
                else:
                    return v

            if "scatter" in step.tool:
                scatter = aslist(step.tool["scatter"])
                method = step.tool.get("scatterMethod")
                if method is None and len(scatter) != 1:
                    raise WorkflowException(
                        "Must specify scatterMethod when scattering over multiple inputs"
                    )
                if "valueFrom" not in kwargs:
                    kwargs["valueFrom"] = valueFromFunc
                if method == "dotproduct" or method is None:
                    jobs = dotproduct_scatter(step, inputobj, basedir, scatter,
                                              callback, **kwargs)
                elif method == "nested_crossproduct":
                    jobs = nested_crossproduct_scatter(step, inputobj, basedir,
                                                       scatter, callback,
                                                       **kwargs)
                elif method == "flat_crossproduct":
                    jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                     scatter, callback, 0,
                                                     **kwargs)
            else:
                _logger.debug("[workflow %s] Job is input %s", self.name,
                              json.dumps(inputobj, indent=4))
                inputobj = {
                    k: valueFromFunc(k, v)
                    for k, v in inputobj.items()
                }
                _logger.debug("[workflow %s] Evaluated job input to %s",
                              self.name, json.dumps(inputobj, indent=4))
                jobs = step.job(inputobj, basedir, callback, **kwargs)

            step.submitted = True

            for j in jobs:
                yield j
        except WorkflowException:
            raise
        except Exception as e:
            _logger.exception("Unhandled exception")
            self.processStatus = "permanentFail"
            step.completed = True
def extend_and_specialize(items, loader):
    """Apply 'extend' and 'specialize' to fully materialize derived record
    types."""

    types = {t["name"]: t for t in items}
    n = []

    for t in items:
        t = copy.deepcopy(t)
        if "extends" in t:
            if "specialize" in t:
                spec = {sp["specializeFrom"]: sp["specializeTo"] for sp in aslist(t["specialize"])}
            else:
                spec = {}

            exfields = []
            exsym = []
            for ex in aslist(t["extends"]):
                if ex not in types:
                    raise Exception("Extends %s in %s refers to invalid base type" % (t["extends"], t["name"]))

                basetype = copy.deepcopy(types[ex])

                if t["type"] == "record":
                    if spec:
                        basetype["fields"] = replace_type(basetype.get("fields", []), spec, loader, set())

                    for f in basetype.get("fields", []):
                        if "inherited_from" not in f:
                            f["inherited_from"] = ex

                    exfields.extend(basetype.get("fields", []))
                elif t["type"] == "enum":
                    exsym.extend(basetype.get("symbols", []))

            if t["type"] == "record":
                exfields.extend(t.get("fields", []))
                t["fields"] = exfields

                fieldnames = set()
                for field in t["fields"]:
                    if field["name"] in fieldnames:
                        raise validate.ValidationException("Field name %s appears twice in %s" % (field["name"], t["name"]))
                    else:
                        fieldnames.add(field["name"])

                for y in [x for x in t["fields"] if x["name"] == "class"]:
                    y["type"] = {"type": "enum",
                                 "symbols": [r["name"]],
                                 "name": r["name"]+"_class",
                    }
                    y["doc"] = "Must be `%s` to indicate this is a %s object." % (r["name"], r["name"])
            elif t["type"] == "enum":
                exsym.extend(t.get("symbols", []))
                t["symbol"] = exsym

            types[t["name"]] = t

        n.append(t)

    ex_types = {t["name"]: t for t in n}

    extended_by = {}
    for t in n:
        if "extends" in t:
            for ex in aslist(t["extends"]):
                if ex_types[ex].get("abstract"):
                    add_dictlist(extended_by, ex, ex_types[t["name"]])
                    add_dictlist(extended_by, avro_name(ex), ex_types[ex])

    for t in n:
        if "fields" in t:
            t["fields"] = replace_type(t["fields"], extended_by, loader, set())

    return n
Beispiel #51
0
    def __init__(self, toolpath_object, pos, **kwargs):
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        try:
            makeTool = kwargs.get("makeTool")
            runobj = None
            if isinstance(toolpath_object["run"], basestring):
                runobj, _ = schema_salad.schema.load_and_validate(
                    kwargs["loader"], kwargs["avsc_names"],
                    toolpath_object["run"], True)
            else:
                runobj = toolpath_object["run"]
            self.embedded_tool = makeTool(runobj, **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"], validate.indent(str(v))))

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                p = shortname(inputid)
                found = False
                for a in self.embedded_tool.tool[field]:
                    frag = shortname(a["id"])
                    if frag == p:
                        i.update(a)
                        found = True
                if not found:
                    i["type"] = "Any"
                    #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id")))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object, **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement not in requirements"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
 def extendsfrom(item, ex):
     if "extends" in item:
         for e in aslist(item["extends"]):
             ex.insert(0, self.typemap[e])
             extendsfrom(self.typemap[e], ex)
    def job(self, joborder, input_basedir, output_callback, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({"position": [-1000000, n], "valueFrom": b})

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({"position": [0, i], "valueFrom": a})

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = set((f["path"] for f in builder.files))

        j = self.makeJobRunner()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints

        _logger.debug(
            "[job %s] initializing from %s%s",
            id(j),
            self.tool.get("id", ""),
            " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "",
        )
        _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4))

        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = builder.do_eval(self.tool["stdin"])
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.add(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = builder.do_eval(self.tool["stdout"])
            if os.path.isabs(j.stdout) or ".." in j.stdout:
                raise validate.ValidationException("stdout must be a relative path")

        builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs)
        builder.requirements = j.requirements

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])[1]

        _logger.debug("[job %s] command line bindings is %s", id(j), json.dumps(builder.bindings, indent=4))
        _logger.debug(
            "[job %s] path mappings is %s",
            id(j),
            json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4),
        )

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix)
            tmpdir_prefix = kwargs.get("tmpdir_prefix")
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir

        createFiles, _ = self.get_requirement("CreateFileRequirement")
        j.generatefiles = {}
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"]))

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
Beispiel #54
0
    def collect_output(self, schema, builder, outdir):
        r = None
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            globpatterns = []
            if "glob" in binding:
                r = []
                for gb in aslist(binding["glob"]):
                    try:
                        gb = builder.do_eval(gb)
                        globpatterns.append(gb)
                        if gb:
                            r.extend([{"path": g, "class": "File"} for g in builder.fs_access.glob(os.path.join(outdir, gb))])
                    except (OSError, IOError) as e:
                        _logger.warn(str(e))
                for files in r:
                    checksum = hashlib.sha1()
                    with builder.fs_access.open(files["path"], "rb") as f:
                        contents = f.read(CONTENT_LIMIT)
                        if binding.get("loadContents"):
                            files["contents"] = contents
                        filesize = 0
                        while contents != "":
                            checksum.update(contents)
                            filesize += len(contents)
                            contents = f.read(1024*1024)
                    files["checksum"] = "sha1$%s" % checksum.hexdigest()
                    files["size"] = filesize
                    if "format" in schema:
                        files["format"] = builder.do_eval(schema["format"], context=files)

            optional = False
            singlefile = False
            if isinstance(schema["type"], list):
                if "null" in schema["type"]:
                    optional = True
                if "File" in schema["type"]:
                    singlefile = True
            elif schema["type"] == "File":
                singlefile = True

            if "outputEval" in binding:
                r = builder.do_eval(binding["outputEval"], context=r)
                if singlefile:
                    # Handle single file outputs not wrapped in a list
                    if r is not None and not isinstance(r, (list, tuple)):
                        r = [r]
                    if optional and r is None:
                        pass
                    elif (r is None or len(r) != 1 or not isinstance(r[0], dict) or "path" not in r[0]):
                        raise WorkflowException("Expression must return a file object for %s." % schema["id"])

            if singlefile:
                if not r and not optional:
                    raise WorkflowException("Did not find output file with glob pattern: '{}'".format(globpatterns))
                elif not r and optional:
                    pass
                elif isinstance(r, list):
                    if len(r) > 1:
                        raise WorkflowException("Multiple matches for output item that is a single file.")
                    else:
                        r = r[0]

            if "secondaryFiles" in schema:
                for primary in aslist(r):
                    if isinstance(primary, dict):
                        primary["secondaryFiles"] = []
                        for sf in aslist(schema["secondaryFiles"]):
                            if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                                sfpath = builder.do_eval(sf, context=r)
                                if isinstance(sfpath, basestring):
                                    sfpath = {"path": sfpath, "class": "File"}
                            else:
                                sfpath = {"path": substitute(primary["path"], sf), "class": "File"}

                            for sfitem in aslist(sfpath):
                                if builder.fs_access.exists(sfitem["path"]):
                                    primary["secondaryFiles"].append(sfitem)

            if not r and optional:
                r = None

        if not r and isinstance(schema["type"], dict) and schema["type"]["type"] == "record":
            r = {}
            for f in schema["type"]["fields"]:
                r[shortname(f["name"])] = self.collect_output(f, builder, outdir)

        return r
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"], dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, dict) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if (
                binding
                and "inputBinding" not in st
                and "itemSeparator" not in binding
                and st["type"] in ("array", "map")
            ):
                st["inputBinding"] = {}
            bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {"type": schema["values"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos
                        )
                    )
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {"type": schema["items"], "inputBinding": b2}, item, lead_pos=n, tail_pos=tail_pos
                        )
                    )
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["path"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                    if "secondaryFiles" in binding:
                        if "secondaryFiles" not in datum:
                            datum["secondaryFiles"] = []
                        for sf in aslist(binding["secondaryFiles"]):
                            if isinstance(sf, dict):
                                sfpath = self.do_eval(sf, context=datum["path"])
                            else:
                                sfpath = {"path": substitute(datum["path"], sf), "class": "File"}
                            if isinstance(sfpath, list):
                                datum["secondaryFiles"].extend(sfpath)
                                self.files.extend(sfpath)
                            else:
                                datum["secondaryFiles"].append(sfpath)
                                self.files.append(sfpath)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Beispiel #56
0
    def job(self, joborder, input_basedir, output_callback, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = set((f["path"] for f in builder.files))

        j = self.makeJobRunner()
        j.builder = builder
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints
        j.name = uniquename(kwargs.get("name", str(id(j))))

        _logger.debug("[job %s] initializing from %s%s",
                     j.name,
                     self.tool.get("id", ""),
                     " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "")
        _logger.debug("[job %s] %s", j.name, json.dumps(joborder, indent=4))


        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = builder.do_eval(self.tool["stdin"])
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.add(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = builder.do_eval(self.tool["stdout"])
            if os.path.isabs(j.stdout) or ".." in j.stdout:
                raise validate.ValidationException("stdout must be a relative path")

        builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs)
        builder.requirements = j.requirements

        # map files to assigned path inside a container. We need to also explicitly
        # walk over input as implicit reassignment doesn't reach everything in builder.bindings
        def _check_adjust(f):
            if not f.get("containerfs"):
                f["path"] = builder.pathmapper.mapper(f["path"])[1]
                f["containerfs"] = True
            return f

        _logger.debug("[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4))

        adjustFileObjs(builder.files, _check_adjust)
        adjustFileObjs(builder.bindings, _check_adjust)

        _logger.debug("[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4))

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix)
            tmpdir_prefix = kwargs.get('tmpdir_prefix')
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir

        createFiles, _ = self.get_requirement("CreateFileRequirement")
        j.generatefiles = {}
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"]))

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        shellcmd, _ = self.get_requirement("ShellCommandRequirement")
        if shellcmd:
            cmd = []
            for b in builder.bindings:
                arg = builder.generate_arg(b)
                if b.get("shellQuote", True):
                    arg = [shellescape.quote(a) for a in aslist(arg)]
                cmd.extend(aslist(arg))
            j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
        else:
            j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
Beispiel #57
0
    def __init__(self, toolpath_object, pos, **kwargs):
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        try:
            makeTool = kwargs.get("makeTool")
            runobj = None
            if isinstance(toolpath_object["run"], basestring):
                runobj, _ = schema_salad.schema.load_and_validate(kwargs["loader"],
                                                                  kwargs["avsc_names"],
                                                                  toolpath_object["run"],
                                                                  True)
            else:
                runobj = toolpath_object["run"]
            self.embedded_tool = makeTool(runobj, **kwargs)
        except validate.ValidationException as v:
            raise WorkflowException("Tool definition %s failed validation:\n%s" % (toolpath_object["run"], validate.indent(str(v))))

        for field in ("inputs", "outputs"):
            for i in toolpath_object[field]:
                inputid = i["id"]
                p = shortname(inputid)
                found = False
                for a in self.embedded_tool.tool[field]:
                    frag = shortname(a["id"])
                    if frag == p:
                        i.update(a)
                        found = True
                if not found:
                    i["type"] = "Any"
                    #raise WorkflowException("Parameter '%s' of %s in workflow step %s does not correspond to parameter in %s" % (p, field, self.id, self.embedded_tool.tool.get("id")))
                i["id"] = inputid

        super(WorkflowStep, self).__init__(toolpath_object, **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature, _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException("Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements")

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException("Workflow contains scatter but ScatterFeatureRequirement not in requirements")

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
Beispiel #58
0
    def try_make_job(self, step, basedir, **kwargs):
        inputobj = {}

        if "scatter" in step.tool:
            if not self.check_feature("ScatterFeature", kwargs):
                raise WorkflowException(
                    "Must include ScatterFeature in requirements.")
            inputparms = copy.deepcopy(step.tool["inputs"])
            outputparms = copy.deepcopy(step.tool["outputs"])
            scatter = aslist(step.tool["scatter"])

            inp_map = {i["id"]: i for i in inputparms}
            for s in aslist(step.tool["scatter"]):
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" %
                                            s)

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if step.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(aslist(step.tool["scatter"]))
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
        else:
            inputparms = step.tool["inputs"]
            outputparms = step.tool["outputs"]

        for inp in inputparms:
            _logger.debug(inp)
            iid = idk(inp["id"])
            if "connect" in inp:
                connections = inp["connect"]
                is_array = isinstance(inp["type"],
                                      dict) and inp["type"]["type"] == "array"
                for connection in aslist(connections):
                    src = idk(connection["source"])
                    if src in self.state and self.state[src] is not None:
                        if self.state[src].parameter["type"] == inp["type"]:
                            # source and input types are the same
                            if is_array and iid in inputobj:
                                # there's already a value in the input object, so extend the existing array
                                inputobj[iid].extend(self.state[src].value)
                            else:
                                # simply assign the value from state to input
                                inputobj[iid] = copy.deepcopy(
                                    self.state[src].value)
                        elif is_array and self.state[src].parameter[
                                "type"] == inp["type"]["items"]:
                            # source type is the item type on the input array
                            # promote single item to array entry
                            if iid in inputobj:
                                inputobj[iid].append(self.state[src].value)
                            else:
                                inputobj[iid] = [self.state[src].value]
                        else:
                            raise WorkflowException(
                                "Type mismatch between '%s' (%s) and '%s' (%s)"
                                % (src, self.state[src].parameter["type"],
                                   idk(inp["id"]), inp["type"]))
                    elif src not in self.state:
                        raise WorkflowException(
                            "Connect source '%s' on parameter '%s' does not exist"
                            % (src, inp["id"]))
                    else:
                        return
            elif "default" in inp:
                inputobj[iid] = inp["default"]
            else:
                raise WorkflowException("Value for %s not specified" %
                                        (inp["id"]))

        _logger.info("Creating job with input: %s", inputobj)

        callback = functools.partial(self.receive_output, step, outputparms)

        if step.tool.get("scatter"):
            method = step.tool.get("scatterMethod")
            if method is None and len(aslist(step.tool["scatter"])) != 1:
                raise WorkflowException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            if method == "dotproduct" or method is None:
                jobs = dotproduct_scatter(step, inputobj, basedir,
                                          aslist(step.tool["scatter"]),
                                          callback, **kwargs)
            elif method == "nested_crossproduct":
                jobs = nested_crossproduct_scatter(
                    step, inputobj, basedir, aslist(step.tool["scatter"]),
                    callback, **kwargs)
            elif method == "flat_crossproduct":
                jobs = flat_crossproduct_scatter(step, inputobj, basedir,
                                                 aslist(step.tool["scatter"]),
                                                 callback, 0, **kwargs)
        else:
            jobs = step.job(inputobj, basedir, callback, **kwargs)

        for j in jobs:
            yield j
    def job(self, joborder, input_basedir, output_callback, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = set((f["path"] for f in builder.files))

        j = self.makeJobRunner()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints

        _logger.debug(
            "[job %s] initializing from %s%s", id(j), self.tool.get("id", ""),
            " as part of %s" %
            kwargs["part_of"] if "part_of" in kwargs else "")
        _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4))

        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = builder.do_eval(self.tool["stdin"])
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.add(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = builder.do_eval(self.tool["stdout"])
            if os.path.isabs(j.stdout) or ".." in j.stdout:
                raise validate.ValidationException(
                    "stdout must be a relative path")

        builder.pathmapper = self.makePathMapper(reffiles, input_basedir,
                                                 **kwargs)
        builder.requirements = j.requirements

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])[1]

        _logger.debug("[job %s] command line bindings is %s", id(j),
                      json.dumps(builder.bindings, indent=4))
        _logger.debug(
            "[job %s] path mappings is %s", id(j),
            json.dumps(
                {
                    p: builder.pathmapper.mapper(p)
                    for p in builder.pathmapper.files()
                },
                indent=4))

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(
                prefix=out_prefix)
            tmpdir_prefix = kwargs.get('tmpdir_prefix')
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(
                prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir

        createFiles, _ = self.get_requirement("CreateFileRequirement")
        j.generatefiles = {}
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[builder.do_eval(
                    t["filename"])] = copy.deepcopy(
                        builder.do_eval(t["fileContent"]))

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports,
                                              self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j