Beispiel #1
0
 def validate_hints(self, hints, strict):
     for r in hints:
         try:
             if self.names.get_name(r["class"], "") is not None:
                 validate.validate_ex(self.names.get_name(r["class"], ""),
                                      r,
                                      strict=strict)
             else:
                 _logger.info(
                     validate.ValidationException("Unknown hint %s" %
                                                  (r["class"])))
         except validate.ValidationException as v:
             raise validate.ValidationException("Validating hint `%s`: %s" %
                                                (r["class"], str(v)))
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" %
                                               (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[
                    "type"] in ("array", "map"):
                st["inputBinding"] = {}
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["values"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["items"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["path"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                    if "secondaryFiles" in binding:
                        if "secondaryFiles" not in datum:
                            datum["secondaryFiles"] = []
                        for sf in aslist(binding["secondaryFiles"]):
                            if isinstance(sf, dict):
                                sfpath = self.do_eval(sf,
                                                      context=datum["path"])
                            else:
                                sfpath = {
                                    "path": substitute(datum["path"], sf),
                                    "class": "File"
                                }
                            if isinstance(sfpath, list):
                                datum["secondaryFiles"].extend(sfpath)
                                self.files.extend(sfpath)
                            else:
                                datum["secondaryFiles"].append(sfpath)
                                self.files.append(sfpath)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
    def job(self, joborder, input_basedir, output_callback, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = set((f["path"] for f in builder.files))

        j = self.makeJobRunner()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints

        _logger.debug(
            "[job %s] initializing from %s%s", id(j), self.tool.get("id", ""),
            " as part of %s" %
            kwargs["part_of"] if "part_of" in kwargs else "")
        _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4))

        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = builder.do_eval(self.tool["stdin"])
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.add(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = builder.do_eval(self.tool["stdout"])
            if os.path.isabs(j.stdout) or ".." in j.stdout:
                raise validate.ValidationException(
                    "stdout must be a relative path")

        builder.pathmapper = self.makePathMapper(reffiles, input_basedir,
                                                 **kwargs)
        builder.requirements = j.requirements

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])[1]

        _logger.debug("[job %s] command line bindings is %s", id(j),
                      json.dumps(builder.bindings, indent=4))
        _logger.debug(
            "[job %s] path mappings is %s", id(j),
            json.dumps(
                {
                    p: builder.pathmapper.mapper(p)
                    for p in builder.pathmapper.files()
                },
                indent=4))

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(
                prefix=out_prefix)
            tmpdir_prefix = kwargs.get('tmpdir_prefix')
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(
                prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir

        createFiles, _ = self.get_requirement("CreateFileRequirement")
        j.generatefiles = {}
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[builder.do_eval(
                    t["filename"])] = copy.deepcopy(
                        builder.do_eval(t["fileContent"]))

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports,
                                              self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
Beispiel #4
0
    def __init__(self,
                 toolpath_object,
                 validateAs,
                 do_validate=True,
                 **kwargs):
        (_, self.names) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""),
                                     self.tool,
                                     strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException(
                    "Could not validate %s as %s:\n%s" %
                    (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)