def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            (_, d) = urlparse.urldefrag(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        for r in self.tool.get("requirements", []):
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" % (r["class"]))

        self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        builder.input_basedir = input_basedir
        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.docpath = self.docpath
        builder.names = self.names

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        return builder
 def validate_hints(self, hints, strict):
     for r in hints:
         try:
             if self.names.get_name(r["class"], "") is not None:
                 validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict)
             else:
                 _logger.info(validate.ValidationException("Unknown hint %s" % (r["class"])))
         except validate.ValidationException as v:
             raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
Esempio n. 3
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder)

        kwargs["part_of"] = "workflow %s" % (id(self))
        wj = WorkflowJob(self, **kwargs)

        yield wj

        for w in wj.job(joborder, basedir, output_callback, **kwargs):
            yield w
Esempio n. 4
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""),
                             joborder)

        kwargs["part_of"] = "workflow %s" % (id(self))
        wj = WorkflowJob(self, **kwargs)

        yield wj

        for w in wj.job(joborder, basedir, output_callback, **kwargs):
            yield w
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder)

        requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])]
        random.shuffle(steps)

        self.state = {}
        self.processStatus = "success"
        for i in self.tool["inputs"]:
            (_, iid) = urlparse.urldefrag(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"]))

        for s in steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None
            s.completed = False

        completed = 0
        while completed < len(steps):
            made_progress = False
            completed = 0
            for step in steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, requirements=requirements, hints=hints, **kwargs):
                        if newjob:
                            made_progress = True
                            yield newjob
            if not made_progress and completed < len(steps):
                yield None

        wo = {}
        for i in self.tool["outputs"]:
            if "connect" in i:
                (_, src) = urlparse.urldefrag(i['id'])
                if i["connect"]["source"] not in self.state:
                    raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (i["connect"]["source"], inp["id"]))
                wo[src] = self.state[i["connect"]["source"]].value

        output_callback(wo, self.processStatus)
Esempio n. 6
0
 def validate_hints(self, hints, strict):
     for r in hints:
         try:
             if self.names.get_name(r["class"], "") is not None:
                 validate.validate_ex(self.names.get_name(r["class"], ""),
                                      r,
                                      strict=strict)
             else:
                 _logger.info(
                     validate.ValidationException("Unknown hint %s" %
                                                  (r["class"])))
         except validate.ValidationException as v:
             raise validate.ValidationException("Validating hint `%s`: %s" %
                                                (r["class"], str(v)))
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc)
                return outputdoc

            ret = {}
            for port in ports:
                doc_url, fragment = urlparse.urldefrag(port["id"])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
 def validate_requirements(self, tool, field):
     for r in tool.get(field, []):
         try:
             if self.names.get_name(r["class"], "") is None:
                 raise validate.ValidationException("Unknown requirement %s" % (r["class"]))
             validate.validate_ex(self.names.get_name(r["class"], ""), r)
             if "requirements" in r:
                 self.validate_requirements(r, "requirements")
             if "hints" in r:
                 self.validate_requirements(r, "hints")
         except validate.ValidationException as v:
             err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v)))
             if field == "hints":
                 _logger.warn(err)
             else:
                 raise validate.ValidationException(err)
    def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            (_, d) = urlparse.urldefrag(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        for r in self.requirements:
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" %
                                        (r["class"]))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/tmp/job_output"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp/job_tmp"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(
            input_basedir)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        return builder
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(
                    self.names.get_name("outputs_record_schema", ""),
                    outputdoc)
                return outputdoc

            ret = {}
            for port in ports:
                doc_url, fragment = urlparse.urldefrag(port['id'])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " +
                                    str(e) + "\n in " +
                                    json.dumps(ret, indent=4))
Esempio n. 11
0
    def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            (_, d) = urlparse.urldefrag(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        for r in self.requirements:
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" % (r["class"]))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/tmp/job_output"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp/job_tmp"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        return builder
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder)

        self.adjust_for_scatter(self.steps)

        random.shuffle(self.steps)

        self.state = {}
        self.processStatus = "success"
        for i in self.tool["inputs"]:
            (_, iid) = urlparse.urldefrag(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None
            s.submitted = False
            s.completed = False

        if "outdir" in kwargs:
            outdir = kwargs["outdir"]
            del kwargs["outdir"]
        else:
            outdir = tempfile.mkdtemp()

        actual_jobs = []

        completed = 0
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False
            completed = 0
            for step in self.steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, **kwargs):
                        if newjob:
                            made_progress = True
                            actual_jobs.append(newjob)
                        yield newjob
            if not made_progress and completed < len(self.steps):
                yield None

        wo = self.object_from_state(self.tool["outputs"], True)

        if kwargs.get("move_outputs", True):
            targets = set()
            conflicts = set()

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir)+1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir)+1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (sp[0], str(random.randint(1, 1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.info("Moving '%s' to '%s'", src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in actual_jobs:
                if a.outdir:
                    _logger.info("Removing intermediate output directory %s", a.outdir)
                    shutil.rmtree(a.outdir, True)

        output_callback(wo, self.processStatus)
Esempio n. 13
0
    def job(self, joborder, basedir, output_callback, **kwargs):
        # Validate job order
        validate.validate_ex(self.names.get_name("input_record_schema", ""),
                             joborder)

        self.adjust_for_scatter(self.steps)

        random.shuffle(self.steps)

        self.state = {}
        self.processStatus = "success"
        for i in self.tool["inputs"]:
            (_, iid) = urlparse.urldefrag(i["id"])
            if iid in joborder:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(joborder[iid]))
            elif "default" in i:
                self.state[i["id"]] = WorkflowStateItem(
                    i, copy.deepcopy(i["default"]))
            else:
                raise WorkflowException(
                    "Input '%s' not in input object and does not have a default value."
                    % (i["id"]))

        for s in self.steps:
            for out in s.tool["outputs"]:
                self.state[out["id"]] = None
            s.submitted = False
            s.completed = False

        if "outdir" in kwargs:
            outdir = kwargs["outdir"]
            del kwargs["outdir"]
        else:
            outdir = tempfile.mkdtemp()

        actual_jobs = []

        completed = 0
        while completed < len(self.steps) and self.processStatus == "success":
            made_progress = False
            completed = 0
            for step in self.steps:
                if step.completed:
                    completed += 1
                else:
                    for newjob in self.try_make_job(step, basedir, **kwargs):
                        if newjob:
                            made_progress = True
                            actual_jobs.append(newjob)
                        yield newjob
            if not made_progress and completed < len(self.steps):
                yield None

        wo = self.object_from_state(self.tool["outputs"], True)

        if kwargs.get("move_outputs", True):
            targets = set()
            conflicts = set()

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir) + 1:])
                        if dst in targets:
                            conflicts.add(dst)
                        else:
                            targets.add(dst)

            for f in findfiles(wo):
                for a in actual_jobs:
                    if a.outdir and f["path"].startswith(a.outdir):
                        src = f["path"]
                        dst = os.path.join(outdir, src[len(a.outdir) + 1:])
                        if dst in conflicts:
                            sp = os.path.splitext(dst)
                            dst = "%s-%s%s" % (
                                sp[0], str(random.randint(1,
                                                          1000000000)), sp[1])
                        dirname = os.path.dirname(dst)
                        if not os.path.exists(dirname):
                            os.makedirs(dirname)
                        _logger.info("Moving '%s' to '%s'", src, dst)
                        shutil.move(src, dst)
                        f["path"] = dst

            for a in actual_jobs:
                if a.outdir:
                    _logger.info("Removing intermediate output directory %s",
                                 a.outdir)
                    shutil.rmtree(a.outdir, True)

        output_callback(wo, self.processStatus)
    def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs):
        (_, self.names) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
Esempio n. 15
0
    def __init__(self,
                 toolpath_object,
                 validateAs,
                 do_validate=True,
                 **kwargs):
        (_, self.names) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""),
                                     self.tool,
                                     strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException(
                    "Could not validate %s as %s:\n%s" %
                    (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException(
                    "Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
    def __init__(self, toolpath_object, validateAs, docpath, **kwargs):
        (_, self.names) = get_schema()
        self.docpath = docpath

        self.tool = toolpath_object

        try:
            # Validate tool documument
            validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, **kwargs)
        except validate.ValidationException as v:
            raise validate.ValidationException("Could not validate %s:\n%s" % (self.tool.get("id"), validate.indent(str(v))))

        self.validate_requirements(self.tool, "requirements")
        self.validate_requirements(self.tool, "hints")

        for t in self.tool.get("requirements", []):
            t["_docpath"] = docpath

        for t in self.tool.get("hints", []):
            t["_docpath"] = docpath

        avro.schema.make_avsc_object({
            "name": "Any",
            "type": "enum",
            "symbols": ["Any"]
        }, self.names)

        self.schemaDefs = {}

        sd, _ = get_feature("SchemaDefRequirement", requirements=self.tool.get("requirements"), hints=self.tool.get("hints"))
        if sd:
            for i in sd["types"]:
                avro.schema.make_avsc_object(i, self.names)
                self.schemaDefs[i["name"]] = i

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        for i in self.tool["inputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.inputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.inputs_record_schema, self.names)

        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}
        for i in self.tool["outputs"]:
            c = copy.copy(i)
            doc_url, fragment = urlparse.urldefrag(c['id'])
            c["name"] = fragment
            del c["id"]

            if "type" not in c:
                raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

            if "default" in c:
                c["type"] = ["null"] + aslist(c["type"])
            else:
                c["type"] = c["type"]
            self.outputs_record_schema["fields"].append(c)

        avro.schema.make_avsc_object(self.outputs_record_schema, self.names)