Exemple #1
0
    def collect_output_ports(self, ports, builder, outdir):
        # type: (Set[Dict[str,Any]], Builder, str) -> Dict[str,Union[str,List[Any],Dict[str,Any]]]
        try:
            ret = {}  # type: Dict[str,Union[str,List[Any],Dict[str,Any]]]
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                with builder.fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
                adjustFileObjs(ret, remove_hostfs)
                adjustFileObjs(ret,
                        cast(Callable[[Any], Any],  # known bug in mypy
                            # https://github.com/python/mypy/issues/797
                            partial(revmap_file, builder, outdir)))
                adjustFileObjs(ret, remove_hostfs)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
                return ret

            for port in ports:
                fragment = shortname(port["id"])
                try:
                    ret[fragment] = self.collect_output(port, builder, outdir)
                except Exception as e:
                    raise WorkflowException(u"Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e))
            if ret:
                adjustFileObjs(ret, remove_hostfs)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Exemple #2
0
 def validate_hints(self, avsc_names: Names, hints: List[CWLObjectType],
                    strict: bool) -> None:
     if self.doc_loader is None:
         return
     debug = _logger.isEnabledFor(logging.DEBUG)
     for i, r in enumerate(hints):
         sl = SourceLine(hints, i, ValidationException, debug)
         with sl:
             classname = cast(str, r["class"])
             avroname = classname
             if classname in self.doc_loader.vocab:
                 avroname = avro_type_name(self.doc_loader.vocab[classname])
             if avsc_names.get_name(avroname, None) is not None:
                 plain_hint = {
                     key: r[key]
                     for key in r if key not in self.doc_loader.identifiers
                 }  # strip identifiers
                 validate_ex(
                     cast(
                         Schema,
                         avsc_names.get_name(avroname, None),
                     ),
                     plain_hint,
                     strict=strict,
                     vocab=self.doc_loader.vocab,
                 )
             elif r["class"] in ("NetworkAccess", "LoadListingRequirement"):
                 pass
             else:
                 _logger.info(
                     str(sl.makeError("Unknown hint %s" % (r["class"]))))
Exemple #3
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            ret = {}
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                with builder.fs_access.open(custom_output, "r") as f:
                    ret = yaml.load(f)
                _logger.debug("Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
                adjustFileObjs(ret, remove_hostfs)
                adjustFileObjs(ret, functools.partial(revmap_file, builder, outdir))
                adjustFileObjs(ret, remove_hostfs)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
                return ret

            for port in ports:
                fragment = shortname(port["id"])
                try:
                    ret[fragment] = self.collect_output(port, builder, outdir)
                except Exception as e:
                    raise WorkflowException("Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e))
            if ret:
                adjustFileObjs(ret, remove_hostfs)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Exemple #4
0
 def validate_hints(
     self, avsc_names: Names, hints: List[CWLObjectType], strict: bool
 ) -> None:
     for i, r in enumerate(hints):
         sl = SourceLine(hints, i, ValidationException)
         with sl:
             if (
                 avsc_names.get_name(cast(str, r["class"]), None) is not None
                 and self.doc_loader is not None
             ):
                 plain_hint = dict(
                     (key, r[key])
                     for key in r
                     if key not in self.doc_loader.identifiers
                 )  # strip identifiers
                 validate_ex(
                     cast(
                         Schema,
                         avsc_names.get_name(cast(str, plain_hint["class"]), None),
                     ),
                     plain_hint,
                     strict=strict,
                 )
             elif r["class"] in ("NetworkAccess", "LoadListingRequirement"):
                 pass
             else:
                 _logger.info(str(sl.makeError("Unknown hint %s" % (r["class"]))))
Exemple #5
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            ret = {}
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                with builder.fs_access.open(custom_output, "r") as f:
                    ret = yaml.load(f)
                _logger.debug("Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
                adjustFileObjs(ret, remove_hostfs)
                adjustFileObjs(ret, functools.partial(revmap_file, builder, outdir))
                adjustFileObjs(ret, remove_hostfs)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
                return ret

            for port in ports:
                fragment = shortname(port["id"])
                try:
                    ret[fragment] = self.collect_output(port, builder, outdir)
                except Exception as e:
                    raise WorkflowException("Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e))
            if ret:
                adjustFileObjs(ret, remove_hostfs)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Exemple #6
0
    def collect_output_ports(self,
                             ports,                  # type: Set[Dict[Text, Any]]
                             builder,                # type: Builder
                             outdir,                 # type: Text
                             rcode,                  # type: int
                             compute_checksum=True,  # type: bool
                             jobname="",             # type: Text
                             readers=None            # type: Dict[Text, Any]
                            ):  # type: (...) -> OutputPorts
        ret = {}  # type: OutputPorts
        debug = _logger.isEnabledFor(logging.DEBUG)
        cwl_version = self.metadata.get(
            "http://commonwl.org/cwltool#original_cwlVersion", None)
        if cwl_version != "v1.0":
            builder.resources["exitCode"] = rcode
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if debug:
                    _logger.debug(u"Raw output from %s: %s", custom_output,
                                  json_dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    def makeWorkflowException(msg):
                        return WorkflowException(
                            u"Error collecting output for parameter '%s':\n%s"
                            % (shortname(port["id"]), msg))
                    with SourceLine(ports, i, makeWorkflowException, debug):
                        fragment = shortname(port["id"])
                        ret[fragment] = self.collect_output(port, builder, outdir, fs_access,
                                                            compute_checksum=compute_checksum)
            if ret:
                revmap = partial(revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)
                visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
                visit_class(ret, ("File", "Directory"), remove_path)
                normalizeFilesDirs(ret)
                visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access))

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))
            expected_schema = cast(Schema, self.names.get_name(
                "outputs_record_schema", ""))
            validate.validate_ex(expected_schema, ret,
                strict=False, logger=_logger_validation_warnings)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException(
                "Error validating output record. " + Text(e) + "\n in "
                + json_dumps(ret, indent=4))
        finally:
            if builder.mutation_manager and readers:
                for r in readers.values():
                    builder.mutation_manager.release_reader(jobname, r)
Exemple #7
0
    def collect_output_ports(self,
                             ports,
                             builder,
                             outdir,
                             compute_checksum=True):
        # type: (Set[Dict[Text, Any]], Builder, Text, bool) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
        ret = {}  # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
        try:

            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.debug(u"Raw output from %s: %s", custom_output,
                                  json.dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    with SourceLine(ports, i, WorkflowException):
                        fragment = shortname(port["id"])
                        try:
                            ret[fragment] = self.collect_output(
                                port,
                                builder,
                                outdir,
                                fs_access,
                                compute_checksum=compute_checksum)
                        except Exception as e:
                            _logger.debug(
                                u"Error collecting output for parameter '%s'" %
                                shortname(port["id"]),
                                exc_info=True)
                            raise WorkflowException(
                                u"Error collecting output for parameter '%s':\n%s"
                                % (shortname(port["id"]), indent(u(str(e)))))

            if ret:
                adjustDirObjs(ret, trim_listing)
                adjustFileObjs(
                    ret,
                    cast(
                        Callable[[Any], Any],  # known bug in mypy
                        # https://github.com/python/mypy/issues/797
                        partial(revmap_file, builder, outdir)))
                adjustFileObjs(ret, remove_path)
                adjustDirObjs(ret, remove_path)
                normalizeFilesDirs(ret)
                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))

            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " +
                                    Text(e) + "\n in " +
                                    json.dumps(ret, indent=4))
Exemple #8
0
 def validate_hints(self, hints, strict):
     for r in hints:
         try:
             if self.names.get_name(r["class"], "") is not None:
                 validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict)
             else:
                 _logger.info(validate.ValidationException("Unknown hint %s" % (r["class"])))
         except validate.ValidationException as v:
             raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
Exemple #9
0
    def collect_output_ports(self,
                             ports,                  # type: Set[Dict[Text, Any]]
                             builder,                # type: Builder
                             outdir,                 # type: Text
                             compute_checksum=True,  # type: bool
                             jobname="",             # type: Text
                             readers=None            # type: Dict[Text, Any]
                             ):                      # type: (...) -> OutputPorts
        ret = {}  # type: OutputPorts
        debug = LOGGER.isEnabledFor(logging.DEBUG)
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if debug:
                    LOGGER.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    def make_workflow_exception(msg):
                        return WorkflowException(
                            u"Error collecting output for parameter '%s':\n%s"
                            % (shortname(port["id"]), msg))
                    with SourceLine(ports, i, make_workflow_exception, debug):
                        fragment = shortname(port["id"])
                        ret[fragment] = self.collect_output(port, builder, outdir, fs_access,
                                                            compute_checksum=compute_checksum)
            if ret:
                # revmap = partial(command_line_tool.revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)

                # TODO: Attempt to avoid a crash because the revmap fct is not functional
                #       (intend for a docker usage only?)
                # visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
                visit_class(ret, ("File", "Directory"), command_line_tool.remove_path)
                normalizeFilesDirs(ret)
                visit_class(ret, ("File", "Directory"), partial(command_line_tool.check_valid_locations, fs_access))

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))

            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret,
                strict=False, logger=LOGGER)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except validate.ValidationException as exc:
            raise WorkflowException("Error validating output record: {!s}\nIn:\n{}"
                                    .format(exc, json.dumps(ret, indent=4)))
        finally:
            if builder.mutation_manager and readers:
                for reader in readers.values():
                    builder.mutation_manager.release_reader(jobname, reader)
Exemple #10
0
    def collect_output_ports(self,
                             ports,                  # type: Set[Dict[Text, Any]]
                             builder,                # type: Builder
                             outdir,                 # type: Text
                             compute_checksum=True,  # type: bool
                             jobname="",             # type: Text
                             readers=None            # type: Dict[Text, Any]
                            ):  # type: (...) -> OutputPorts
        ret = {}  # type: OutputPorts
        debug = _logger.isEnabledFor(logging.DEBUG)
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if debug:
                    _logger.debug(u"Raw output from %s: %s", custom_output,
                                  json_dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    def makeWorkflowException(msg):
                        return WorkflowException(
                            u"Error collecting output for parameter '%s':\n%s"
                            % (shortname(port["id"]), msg))
                    with SourceLine(ports, i, makeWorkflowException, debug):
                        fragment = shortname(port["id"])
                        ret[fragment] = self.collect_output(port, builder, outdir, fs_access,
                                                            compute_checksum=compute_checksum)
            if ret:
                revmap = partial(revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)
                visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
                visit_class(ret, ("File", "Directory"), remove_path)
                normalizeFilesDirs(ret)
                visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access))

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))
            expected_schema = cast(Schema, self.names.get_name(
                "outputs_record_schema", ""))
            validate.validate_ex(expected_schema, ret,
                strict=False, logger=_logger_validation_warnings)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException(
                "Error validating output record. " + Text(e) + "\n in "
                + json_dumps(ret, indent=4))
        finally:
            if builder.mutation_manager and readers:
                for r in readers.values():
                    builder.mutation_manager.release_reader(jobname, r)
Exemple #11
0
 def validate_hints(self, avsc_names, hints, strict):
     # type: (Any, List[Dict[str, Any]], bool) -> None
     for r in hints:
         try:
             if avsc_names.get_name(r["class"], "") is not None:
                 validate.validate_ex(avsc_names.get_name(r["class"], ""), r, strict=strict)
             else:
                 _logger.info(str(validate.ValidationException(
                 u"Unknown hint %s" % (r["class"]))))
         except validate.ValidationException as v:
             raise validate.ValidationException(u"Validating hint `%s`: %s" % (r["class"], str(v)))
Exemple #12
0
    def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            d = shortname(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        for r in self.requirements:
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" %
                                        (r["class"]))

        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(
            input_basedir)

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]),
                                self.requirements, self.formatgraph)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #13
0
 def validate_hints(self, avsc_names, hints, strict):
     # type: (Any, List[Dict[Text, Any]], bool) -> None
     for i, r in enumerate(hints):
         sl = SourceLine(hints, i, validate.ValidationException)
         with sl:
             if avsc_names.get_name(r["class"], "") is not None:
                 plain_hint = dict((key, r[key]) for key in r if key not in
                                   self.doc_loader.identifiers)  # strip identifiers
                 validate.validate_ex(
                     avsc_names.get_name(plain_hint["class"], ""),
                     plain_hint, strict=strict)
             else:
                 _logger.info(sl.makeError(u"Unknown hint %s" % (r["class"])))
Exemple #14
0
 def validate_hints(self, avsc_names, hints, strict):
     # type: (Any, List[Dict[Text, Any]], bool) -> None
     for i, r in enumerate(hints):
         sl = SourceLine(hints, i, validate.ValidationException)
         with sl:
             if avsc_names.get_name(r["class"], "") is not None:
                 plain_hint = dict((key, r[key]) for key in r if key not in
                                   self.doc_loader.identifiers)  # strip identifiers
                 validate.validate_ex(
                     avsc_names.get_name(plain_hint["class"], ""),
                     plain_hint, strict=strict)
             else:
                 _logger.info(sl.makeError(u"Unknown hint %s" % (r["class"])))
Exemple #15
0
 def validate_hints(self, hints, strict):
     for r in hints:
         try:
             if self.names.get_name(r["class"], "") is not None:
                 validate.validate_ex(self.names.get_name(r["class"], ""),
                                      r,
                                      strict=strict)
             else:
                 _logger.info(
                     validate.ValidationException("Unknown hint %s" %
                                                  (r["class"])))
         except validate.ValidationException as v:
             raise validate.ValidationException("Validating hint `%s`: %s" %
                                                (r["class"], str(v)))
Exemple #16
0
    def _init_job(self, joborder, input_basedir, **kwargs):
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        for i in self.tool["inputs"]:
            d = shortname(i["id"])
            if d not in builder.job and "default" in i:
                builder.job[d] = i["default"]

        for r in self.requirements:
            if r["class"] not in supportedProcessRequirements:
                raise WorkflowException("Unsupported process requirement %s" % (r["class"]))

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir)

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.requirements, self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #17
0
    def collect_output_ports(self, ports, builder, outdir, compute_checksum=True, jobname="", readers=None):
        # type: (Set[Dict[Text, Any]], Builder, Text, bool, Text, Dict[Text, Any]) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
        ret = {}  # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    with SourceLine(ports, i, WorkflowException):
                        fragment = shortname(port["id"])
                        try:
                            ret[fragment] = self.collect_output(port, builder, outdir, fs_access,
                                                                compute_checksum=compute_checksum)
                        except Exception as e:
                            _logger.debug(
                                u"Error collecting output for parameter '%s'"
                                % shortname(port["id"]), exc_info=True)
                            raise WorkflowException(
                                u"Error collecting output for parameter '%s':\n%s"
                                % (shortname(port["id"]), indent(u(str(e)))))

            if ret:
                revmap = partial(revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)
                visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
                visit_class(ret, ("File", "Directory"), remove_path)
                normalizeFilesDirs(ret)
                visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access))

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))

            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret,
                                 strict=False, logger=_logger_validation_warnings)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record. " + Text(e) + "\n in " + json.dumps(ret, indent=4))
        finally:
            if builder.mutation_manager and readers:
                for r in readers.values():
                    builder.mutation_manager.release_reader(jobname, r)
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc)
                return outputdoc

            ret = {}
            for port in ports:
                fragment = shortname(port["id"])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
Exemple #19
0
 def validate_hints(self, avsc_names, hints, strict):
     # type: (Any, List[Dict[str, Any]], bool) -> None
     for r in hints:
         try:
             if avsc_names.get_name(r["class"], "") is not None:
                 validate.validate_ex(avsc_names.get_name(r["class"], ""),
                                      r,
                                      strict=strict)
             else:
                 _logger.info(
                     str(
                         validate.ValidationException(u"Unknown hint %s" %
                                                      (r["class"]))))
         except validate.ValidationException as v:
             raise validate.ValidationException(
                 u"Validating hint `%s`: %s" % (r["class"], str(v)))
def get_expressions(
    tool: Union[CommentedMap, str],
    schema: Optional[Union[Schema, ArraySchema]],
    source_line: Optional[SourceLine] = None,
) -> List[Tuple[str, Optional[SourceLine]]]:
    if is_expression(tool, schema):
        return [(cast(str, tool), source_line)]
    elif isinstance(schema, UnionSchema):
        valid_schema = None

        for possible_schema in schema.schemas:
            if is_expression(tool, possible_schema):
                return [(cast(str, tool), source_line)]
            elif validate_ex(
                possible_schema,
                tool,
                raise_ex=False,
                logger=_logger_validation_warnings,
            ):
                valid_schema = possible_schema

        return get_expressions(tool, valid_schema, source_line)
    elif isinstance(schema, ArraySchema):
        if not isinstance(tool, MutableSequence):
            return []

        return list(
            itertools.chain(
                *map(
                    lambda x: get_expressions(
                        x[1], schema.items, SourceLine(tool, x[0])  # type: ignore
                    ),
                    enumerate(tool),
                )
            )
        )

    elif isinstance(schema, RecordSchema):
        if not isinstance(tool, MutableMapping):
            return []

        expression_nodes = []

        for schema_field in schema.fields:
            if schema_field.name in tool:
                expression_nodes.extend(
                    get_expressions(
                        tool[schema_field.name],
                        schema_field.type,
                        SourceLine(tool, schema_field.name),
                    )
                )

        return expression_nodes
    else:
        return []
Exemple #21
0
 def validate_hints(self, avsc_names, hints, strict):
     # type: (Any, List[Dict[str, Any]], bool) -> None
     for i, r in enumerate(hints):
         sl = SourceLine(hints, i, validate.ValidationException)
         with sl:
             if (avsc_names.get_name(r["class"], "") is not None
                     and self.doc_loader is not None):
                 plain_hint = dict(
                     (key, r[key]) for key in r if key not in
                     self.doc_loader.identifiers)  # strip identifiers
                 validate.validate_ex(
                     avsc_names.get_name(plain_hint["class"], ""),
                     plain_hint,
                     strict=strict,
                 )
             elif r["class"] in ("NetworkAccess", "LoadListingRequirement"):
                 pass
             else:
                 _logger.info(
                     str(sl.makeError("Unknown hint %s" % (r["class"]))))
    def collect_output_ports(self, ports, builder, outdir, compute_checksum=True):
        # type: (Set[Dict[Text, Any]], Builder, Text, bool) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
        try:
            ret = {}  # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]]
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):
                    with SourceLine(ports, i, WorkflowException):
                        fragment = shortname(port["id"])
                        try:
                            ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum)
                        except Exception as e:
                            _logger.debug(
                                u"Error collecting output for parameter '%s'"
                                % shortname(port["id"]), exc_info=True)
                            raise WorkflowException(
                                u"Error collecting output for parameter '%s':\n%s"
                                % (shortname(port["id"]), indent(unicode(e))))

            if ret:
                adjustFileObjs(ret,
                        cast(Callable[[Any], Any],  # known bug in mypy
                            # https://github.com/python/mypy/issues/797
                            partial(revmap_file, builder, outdir)))
                adjustFileObjs(ret, remove_path)
                adjustDirObjs(ret, remove_path)
                normalizeFilesDirs(ret)
                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))

            validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " + Text(e) + "\n in " + json.dumps(ret, indent=4))
Exemple #23
0
    def collect_output_ports(self, ports, builder, outdir):
        try:
            custom_output = os.path.join(outdir, "cwl.output.json")
            if builder.fs_access.exists(custom_output):
                outputdoc = yaml.load(custom_output)
                validate.validate_ex(
                    self.names.get_name("outputs_record_schema", ""),
                    outputdoc)
                return outputdoc

            ret = {}

            for port in ports:
                fragment = shortname(port["id"])
                ret[fragment] = self.collect_output(port, builder, outdir)
            validate.validate_ex(
                self.names.get_name("outputs_record_schema", ""), ret)
            return ret if ret is not None else {}
        except validate.ValidationException as e:
            raise WorkflowException("Error validating output record, " +
                                    str(e) + "\n in " +
                                    json.dumps(ret, indent=4))
Exemple #24
0
def get_expressions(
    tool,  # type: Union[CommentedMap, Any]
    schema,  # type: Optional[avro.schema.Schema]
    source_line=None  # type: Optional[SourceLine]
):  # type: (...) -> List[Tuple[Text, Optional[SourceLine]]]
    if is_expression(tool, schema):
        return [(tool, source_line)]
    elif isinstance(schema, avro.schema.UnionSchema):
        valid_schema = None

        for possible_schema in schema.schemas:
            if is_expression(tool, possible_schema):
                return [(tool, source_line)]
            elif validate_ex(possible_schema,
                             tool,
                             strict=True,
                             raise_ex=False,
                             logger=_logger_validation_warnings):
                valid_schema = possible_schema

        return get_expressions(tool, valid_schema, source_line)
    elif isinstance(schema, avro.schema.ArraySchema):
        if not isinstance(tool, list):
            return []

        return list(
            itertools.chain(*map(lambda x: get_expressions(
                x[1], schema.items, SourceLine(tool, x[0])), enumerate(
                    tool
                ))  # type: ignore # https://github.com/python/mypy/issues/4679
                            ))

    elif isinstance(schema, avro.schema.RecordSchema):
        if not isinstance(tool, Dict):
            return []

        expression_nodes = []

        for schema_field in schema.fields:
            if schema_field.name in tool:
                expression_nodes.extend(
                    get_expressions(tool[schema_field.name], schema_field.type,
                                    SourceLine(tool, schema_field.name)))

        return expression_nodes
    else:
        return []
Exemple #25
0
def get_expressions(
    tool,  # type: Union[CommentedMap, Text]
    schema,  # type: Optional[avro.schema.Schema]
    source_line=None  # type: Optional[SourceLine]
):  # type: (...) -> List[Tuple[Text, Optional[SourceLine]]]
    if is_expression(tool, schema):
        return [(cast(Text, tool), source_line)]
    elif isinstance(schema, avro.schema.UnionSchema):
        valid_schema = None

        for possible_schema in schema.schemas:
            if is_expression(tool, possible_schema):
                return [(cast(Text, tool), source_line)]
            elif validate_ex(possible_schema,
                             tool,
                             raise_ex=False,
                             logger=_logger_validation_warnings):
                valid_schema = possible_schema

        return get_expressions(tool, valid_schema, source_line)
    elif isinstance(schema, avro.schema.ArraySchema):
        if not isinstance(tool, MutableSequence):
            return []

        return list(
            itertools.chain(*map(
                lambda x: get_expressions(x[1], schema.items,
                                          SourceLine(tool, x[0])),
                enumerate(tool))))

    elif isinstance(schema, avro.schema.RecordSchema):
        if not isinstance(tool, MutableMapping):
            return []

        expression_nodes = []

        for schema_field in schema.fields:
            if schema_field.name in tool:
                expression_nodes.extend(
                    get_expressions(tool[schema_field.name], schema_field.type,
                                    SourceLine(tool, schema_field.name)))

        return expression_nodes
    else:
        return []
Exemple #26
0
def get_expressions(tool,             # type: Union[CommentedMap, Any]
                    schema,           # type: Optional[avro.schema.Schema]
                    source_line=None  # type: Optional[SourceLine]
                   ):  # type: (...) -> List[Tuple[Text, Optional[SourceLine]]]
    if is_expression(tool, schema):
        return [(tool, source_line)]
    elif isinstance(schema, avro.schema.UnionSchema):
        valid_schema = None

        for possible_schema in schema.schemas:
            if is_expression(tool, possible_schema):
                return [(tool, source_line)]
            elif validate_ex(possible_schema, tool, raise_ex=False,
                             logger=_logger_validation_warnings):
                valid_schema = possible_schema

        return get_expressions(tool, valid_schema, source_line)
    elif isinstance(schema, avro.schema.ArraySchema):
        if not isinstance(tool, MutableSequence):
            return []

        return list(itertools.chain(
            *map(lambda x: get_expressions(x[1], schema.items, SourceLine(tool, x[0])), enumerate(tool))  # type: ignore # https://github.com/python/mypy/issues/4679
        ))

    elif isinstance(schema, avro.schema.RecordSchema):
        if not isinstance(tool, MutableMapping):
            return []

        expression_nodes = []

        for schema_field in schema.fields:
            if schema_field.name in tool:
                expression_nodes.extend(get_expressions(
                    tool[schema_field.name],
                    schema_field.type,
                    SourceLine(tool, schema_field.name)
                ))

        return expression_nodes
    else:
        return []
Exemple #27
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[Text, Text], **Any) -> Builder
        """
        kwargs:

        eval_timeout: javascript evaluation timeout
        use_container: do/don't use Docker when DockerRequirement hint provided
        make_fs_access: make an FsAccess() object with given basedir
        basedir: basedir for FsAccess
        docker_outdir: output directory inside docker for this job
        docker_tmpdir: tmpdir inside docker for this job
        docker_stagedir: stagedir inside docker for this job
        outdir: outdir on host for this job
        tmpdir: tmpdir on host for this job
        stagedir: stagedir on host for this job
        select_resources: callback to select compute resources
        debug: enable debugging output
        js_console: enable javascript console output
        """

        builder = Builder()
        builder.job = cast(Dict[Text, Union[Dict[Text, Any], List,
                                            Text]], copy.deepcopy(joborder))

        # Validate job order
        try:
            fillInDefaults(self.tool[u"inputs"], builder.job)
            normalizeFilesDirs(builder.job)
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job,
                                 strict=False, logger=_logger_validation_warnings)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        builder.files = []
        builder.bindings = CommentedSeq()
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.hints = self.hints
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")
        builder.debug = kwargs.get("debug")
        builder.js_console = kwargs.get("js_console")
        builder.mutation_manager = kwargs.get("mutation_manager")

        builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess
        builder.fs_access = builder.make_fs_access(kwargs["basedir"])
        builder.force_docker_pull = kwargs.get("force_docker_pull")

        loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement")
        if loadListingReq:
            builder.loadListing = loadListingReq.get("loadListing")

        dockerReq, is_req = self.get_requirement("DockerRequirement")
        defaultDocker = None

        if dockerReq is None and "default_container" in kwargs:
            defaultDocker = kwargs["default_container"]

        if (dockerReq or defaultDocker) and kwargs.get("use_container"):
            if dockerReq:
                # Check if docker output directory is absolute
                if dockerReq.get("dockerOutputDirectory") and dockerReq.get("dockerOutputDirectory").startswith('/'):
                    builder.outdir = dockerReq.get("dockerOutputDirectory")
                else:
                    builder.outdir = builder.fs_access.docker_compatible_realpath(
                        dockerReq.get("dockerOutputDirectory") or kwargs.get("docker_outdir") or "/var/spool/cwl")
            elif defaultDocker:
                builder.outdir = builder.fs_access.docker_compatible_realpath(
                    kwargs.get("docker_outdir") or "/var/spool/cwl")
            builder.tmpdir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_tmpdir") or "/tmp")
            builder.stagedir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl")
        else:
            builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp())
            if self.tool[u"class"] != 'Workflow':
                builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp())
                builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp())

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc)
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", a)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", a)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        # TODO: unify for both runtime
        if six.PY3:
            key = cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda dict: dict["position"]
        builder.bindings.sort(key=key)
        builder.resources = self.evalResources(builder, kwargs)
        builder.job_script_provider = kwargs.get("job_script_provider", None)
        return builder
Exemple #28
0
    def _init_job(self, joborder, runtime_context):
        # type: (Mapping[str, str], RuntimeContext) -> Builder

        if self.metadata.get("cwlVersion") != INTERNAL_VERSION:
            raise WorkflowException(
                "Process object loaded with version '%s', must update to '%s' in order to execute."
                % (self.metadata.get("cwlVersion"), INTERNAL_VERSION))

        job = cast(Dict[str, expression.JSON], copy.deepcopy(joborder))

        make_fs_access = getdefault(runtime_context.make_fs_access,
                                    StdFsAccess)
        fs_access = make_fs_access(runtime_context.basedir)

        load_listing_req, _ = self.get_requirement("LoadListingRequirement")

        if load_listing_req is not None:
            load_listing = load_listing_req.get("loadListing")
        else:
            load_listing = "no_listing"

        # Validate job order
        try:
            fill_in_defaults(self.tool["inputs"], job, fs_access)

            normalizeFilesDirs(job)
            schema = self.names.get_name("input_record_schema", "")
            if schema is None:
                raise WorkflowException("Missing input record schema: "
                                        "{}".format(self.names))
            validate.validate_ex(schema,
                                 job,
                                 strict=False,
                                 logger=_logger_validation_warnings)

            if load_listing and load_listing != "no_listing":
                get_listing(fs_access,
                            job,
                            recursive=(load_listing == "deep_listing"))

            visit_class(job, ("File", ),
                        functools.partial(add_sizes, fs_access))

            if load_listing == "deep_listing":
                for i, inparm in enumerate(self.tool["inputs"]):
                    k = shortname(inparm["id"])
                    if k not in job:
                        continue
                    v = job[k]
                    dircount = [0]

                    def inc(d):  # type: (List[int]) -> None
                        d[0] += 1

                    visit_class(v, ("Directory", ), lambda x: inc(dircount))
                    if dircount[0] == 0:
                        continue
                    filecount = [0]
                    visit_class(v, ("File", ), lambda x: inc(filecount))
                    if filecount[0] > FILE_COUNT_WARNING:
                        # Long lines in this message are okay, will be reflowed based on terminal columns.
                        _logger.warning(
                            strip_dup_lineno(
                                SourceLine(self.tool["inputs"], i, str).
                                makeError(
                                    """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'.  This may negatively affect workflow performance and memory use.

If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior:

$namespaces:
  cwltool: "http://commonwl.org/cwltool#"
hints:
  cwltool:LoadListingRequirement:
    loadListing: shallow_listing

""" % (filecount[0], k))))

        except (validate.ValidationException, WorkflowException) as err:
            raise WorkflowException("Invalid job input record:\n" +
                                    str(err)) from err

        files = []  # type: List[Dict[str, str]]
        bindings = CommentedSeq()
        tmpdir = ""
        stagedir = ""

        docker_req, _ = self.get_requirement("DockerRequirement")
        default_docker = None

        if docker_req is None and runtime_context.default_container:
            default_docker = runtime_context.default_container

        if (docker_req or default_docker) and runtime_context.use_container:
            if docker_req is not None:
                # Check if docker output directory is absolute
                if docker_req.get("dockerOutputDirectory") and docker_req.get(
                        "dockerOutputDirectory").startswith("/"):
                    outdir = docker_req.get("dockerOutputDirectory")
                else:
                    outdir = (docker_req.get("dockerOutputDirectory")
                              or runtime_context.docker_outdir
                              or random_outdir())
            elif default_docker is not None:
                outdir = runtime_context.docker_outdir or random_outdir()
            tmpdir = runtime_context.docker_tmpdir or "/tmp"  # nosec
            stagedir = runtime_context.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtime_context.outdir or tempfile.mkdtemp(prefix=getdefault(
                    runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
            if self.tool["class"] != "Workflow":
                tmpdir = fs_access.realpath(runtime_context.tmpdir
                                            or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtime_context.stagedir
                                              or tempfile.mkdtemp())

        builder = Builder(
            job,
            files,
            bindings,
            self.schemaDefs,
            self.names,
            self.requirements,
            self.hints,
            {},
            runtime_context.mutation_manager,
            self.formatgraph,
            make_fs_access,
            fs_access,
            runtime_context.job_script_provider,
            runtime_context.eval_timeout,
            runtime_context.debug,
            runtime_context.js_console,
            runtime_context.force_docker_pull,
            load_listing,
            outdir,
            tmpdir,
            stagedir,
        )

        bindings.extend(
            builder.bind_input(
                self.inputs_record_schema,
                job,
                discover_secondaryFiles=getdefault(runtime_context.toplevel,
                                                   False),
            ))

        if self.tool.get("baseCommand"):
            for index, command in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, index],
                    "datum": command
                })

        if self.tool.get("arguments"):
            for i, arg in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                filename = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(arg, MutableMapping):
                    arg = copy.deepcopy(arg)
                    if arg.get("position"):
                        position = arg.get("position")
                        if isinstance(position, str):  # no need to test the
                            # CWLVersion as the v1.0
                            # schema only allows ints
                            position = builder.do_eval(position)
                            if position is None:
                                position = 0
                        arg["position"] = [position, i]
                    else:
                        arg["position"] = [0, i]
                    bindings.append(arg)
                elif ("$(" in arg) or ("${" in arg):
                    cm = CommentedMap((("position", [0,
                                                     i]), ("valueFrom", arg)))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)
                else:
                    cm = CommentedMap((("position", [0, i]), ("datum", arg)))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        key = functools.cmp_to_key(cmp_like_py2)

        # This awkward construction replaces the contents of
        # "bindings" in place (because Builder expects it to be
        # mutated in place, sigh, I'm sorry) with its contents sorted,
        # supporting different versions of Python and ruamel.yaml with
        # different behaviors/bugs in CommentedSeq.
        bindings_copy = copy.deepcopy(bindings)
        del bindings[:]
        bindings.extend(sorted(bindings_copy, key=key))

        if self.tool["class"] != "Workflow":
            builder.resources = self.evalResources(builder, runtime_context)
        return builder
Exemple #29
0
def get_expressions(
    tool: Union[CommentedMap, str, CommentedSeq],
    schema: Optional[Union[Schema, ArraySchema]],
    source_line: Optional[SourceLine] = None,
) -> List[Tuple[str, Optional[SourceLine]]]:
    debug = _logger.isEnabledFor(logging.DEBUG)
    if is_expression(tool, schema):
        return [(cast(str, tool), source_line)]
    elif isinstance(schema, UnionSchema):
        valid_schema = None

        for possible_schema in schema.schemas:
            if is_expression(tool, possible_schema):
                return [(cast(str, tool), source_line)]
            elif validate_ex(
                    possible_schema,
                    tool,
                    raise_ex=False,
                    logger=_logger_validation_warnings,
                    vocab={},
            ):
                valid_schema = possible_schema

        return get_expressions(tool, valid_schema, source_line)
    elif isinstance(schema, ArraySchema):
        if not isinstance(tool, MutableSequence):
            return []

        def tmp_expr(
            x: Tuple[int, Union[CommentedMap, str, CommentedSeq]]
        ) -> List[Tuple[str, Optional[SourceLine]]]:
            # using a lambda for this broke mypyc v0.910 and before
            return get_expressions(
                x[1],
                cast(ArraySchema, schema).items,
                SourceLine(tool, x[0], include_traceback=debug),
            )

        return list(itertools.chain(*map(
            tmp_expr,
            enumerate(tool),
        )))

    elif isinstance(schema, RecordSchema):
        if not isinstance(tool, MutableMapping):
            return []

        expression_nodes = []

        for schema_field in schema.fields:
            if schema_field.name in tool:
                expression_nodes.extend(
                    get_expressions(
                        tool[schema_field.name],
                        schema_field.type,
                        SourceLine(tool,
                                   schema_field.name,
                                   include_traceback=debug),
                    ))

        return expression_nodes
    else:
        return []
Exemple #30
0
    def _init_job(self, joborder, input_basedir, **kwargs):
        # type: (Dict[str, str], str, **Any) -> Builder
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        fillInDefaults(self.tool["inputs"], builder.job)

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
        else:
            if kwargs.get('tmp_outdir_prefix'):
                builder.outdir = tempfile.mkdtemp(prefix=kwargs.get('tmp_outdir_prefix'))
            else:
                builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            if kwargs.get('tmpdir_prefix'):
                builder.tmpdir = tempfile.mkdtemp(prefix=kwargs.get('tmpdir_prefix'))
            else:
                builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir)

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #31
0
    def _init_job(self, joborder, runtimeContext):
        # type: (MutableMapping[Text, Text], RuntimeContext) -> Builder

        job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]],
                   copy.deepcopy(joborder))

        make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        fs_access = make_fs_access(runtimeContext.basedir)

        # Validate job order
        try:
            fill_in_defaults(self.tool[u"inputs"], job, fs_access)
            normalizeFilesDirs(job)
            validate.validate_ex(self.names.get_name("input_record_schema", ""),
                                 job, strict=False, logger=_logger_validation_warnings)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        files = []  # type: List[Dict[Text, Text]]
        bindings = CommentedSeq()
        tmpdir = u""
        stagedir = u""

        loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement")
        if loadListingReq is not None:
            loadListing = loadListingReq.get("loadListing")
        else:
            loadListing = "deep_listing"   # will default to "no_listing" in CWL v1.1

        dockerReq, _ = self.get_requirement("DockerRequirement")
        defaultDocker = None

        if dockerReq is None and runtimeContext.default_container:
            defaultDocker = runtimeContext.default_container

        if (dockerReq or defaultDocker) and runtimeContext.use_container:
            if dockerReq is not None:
                # Check if docker output directory is absolute
                if dockerReq.get("dockerOutputDirectory") and \
                        dockerReq.get("dockerOutputDirectory").startswith('/'):
                    outdir = dockerReq.get("dockerOutputDirectory")
                else:
                    outdir = dockerReq.get("dockerOutputDirectory") or \
                        runtimeContext.docker_outdir or random_outdir()
            elif defaultDocker is not None:
                outdir = runtimeContext.docker_outdir or random_outdir()
            tmpdir = runtimeContext.docker_tmpdir or "/tmp"
            stagedir = runtimeContext.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtimeContext.outdir or tempfile.mkdtemp(
                    prefix=getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
            if self.tool[u"class"] != 'Workflow':
                tmpdir = fs_access.realpath(runtimeContext.tmpdir or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtimeContext.stagedir or tempfile.mkdtemp())

        builder = Builder(job,
                          files,
                          bindings,
                          self.schemaDefs,
                          self.names,
                          self.requirements,
                          self.hints,
                          {},
                          runtimeContext.mutation_manager,
                          self.formatgraph,
                          make_fs_access,
                          fs_access,
                          runtimeContext.job_script_provider,
                          runtimeContext.eval_timeout,
                          runtimeContext.debug,
                          runtimeContext.js_console,
                          runtimeContext.force_docker_pull,
                          loadListing,
                          outdir,
                          tmpdir,
                          stagedir)

        bindings.extend(builder.bind_input(
            self.inputs_record_schema, job,
            discover_secondaryFiles=getdefault(runtimeContext.toplevel, False)))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(a, MutableMapping):
                    a = copy.deepcopy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", a)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", a)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        # TODO: unify for both runtime
        if PY3:
            key = functools.cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda d: d["position"]

        # This awkward construction replaces the contents of
        # "bindings" in place (because Builder expects it to be
        # mutated in place, sigh, I'm sorry) with its contents sorted,
        # supporting different versions of Python and ruamel.yaml with
        # different behaviors/bugs in CommentedSeq.
        bd = copy.deepcopy(bindings)
        del bindings[:]
        bindings.extend(sorted(bd, key=key))

        if self.tool[u"class"] != 'Workflow':
            builder.resources = self.evalResources(builder, runtimeContext)
        return builder
Exemple #32
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[Text, Text], **Any) -> Builder
        """
        kwargs:

        eval_timeout: javascript evaluation timeout
        use_container: do/don't use Docker when DockerRequirement hint provided
        make_fs_access: make an FsAccess() object with given basedir
        basedir: basedir for FsAccess
        docker_outdir: output directory inside docker for this job
        docker_tmpdir: tmpdir inside docker for this job
        docker_stagedir: stagedir inside docker for this job
        outdir: outdir on host for this job
        tmpdir: tmpdir on host for this job
        stagedir: stagedir on host for this job
        select_resources: callback to select compute resources
        """

        builder = Builder()
        builder.job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]],
                           copy.deepcopy(joborder))

        # Validate job order
        try:
            fillInDefaults(self.tool[u"inputs"], builder.job)
            normalizeFilesDirs(builder.job)
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        builder.files = []
        builder.bindings = CommentedSeq()
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.hints = self.hints
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")
        builder.debug = kwargs.get("debug")

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if dockerReq and is_req and not kwargs.get("use_container"):
            raise WorkflowException(
                "Document has DockerRequirement under 'requirements' but use_container is false.  DockerRequirement must be under 'hints' or use_container must be true."
            )

        builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess
        builder.fs_access = builder.make_fs_access(kwargs["basedir"])

        if dockerReq and kwargs.get("use_container"):
            builder.outdir = builder.fs_access.realpath(
                dockerReq.get("dockerOutputDirectory")
                or kwargs.get("docker_outdir") or "/var/spool/cwl")
            builder.tmpdir = builder.fs_access.realpath(
                kwargs.get("docker_tmpdir") or "/tmp")
            builder.stagedir = builder.fs_access.realpath(
                kwargs.get("docker_stagedir") or "/var/lib/cwl")
        else:
            builder.outdir = builder.fs_access.realpath(
                kwargs.get("outdir") or tempfile.mkdtemp())
            builder.tmpdir = builder.fs_access.realpath(
                kwargs.get("tmpdir") or tempfile.mkdtemp())
            builder.stagedir = builder.fs_access.realpath(
                kwargs.get("stagedir") or tempfile.mkdtemp())

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]),
                                self.formatgraph)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc)
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((("position", [0, i]), ("valueFrom", a)))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)
                else:
                    cm = CommentedMap((("position", [0, i]), ("datum", a)))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #33
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[unicode, unicode], **Any) -> Builder
        builder = Builder()
        builder.job = cast(Dict[unicode, Union[Dict[unicode, Any], List,
            unicode]], copy.deepcopy(joborder))

        fillInDefaults(self.tool[u"inputs"], builder.job)
        normalizeFilesDirs(builder.job)

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if dockerReq and is_req and not kwargs.get("use_container"):
            raise WorkflowException("Document has DockerRequirement under 'requirements' but use_container is false.  DockerRequirement must be under 'hints' or use_container must be true.")

        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
            builder.stagedir = kwargs.get("docker_stagedir") or "/var/lib/cwl"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()
            builder.stagedir = kwargs.get("stagedir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(kwargs["basedir"])

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    builder.bindings.append({
                        "position": [0, i],
                        "do_eval": a,
                        "valueFrom": None
                    })
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #34
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[Text, Text], **Any) -> Builder
        """
        kwargs:

        eval_timeout: javascript evaluation timeout
        use_container: do/don't use Docker when DockerRequirement hint provided
        make_fs_access: make an FsAccess() object with given basedir
        basedir: basedir for FsAccess
        docker_outdir: output directory inside docker for this job
        docker_tmpdir: tmpdir inside docker for this job
        docker_stagedir: stagedir inside docker for this job
        outdir: outdir on host for this job
        tmpdir: tmpdir on host for this job
        stagedir: stagedir on host for this job
        select_resources: callback to select compute resources
        debug: enable debugging output
        js_console: enable javascript console output
        """

        builder = Builder()
        builder.job = cast(Dict[Text, Union[Dict[Text, Any], List,
                                            Text]], copy.deepcopy(joborder))

        # Validate job order
        try:
            fillInDefaults(self.tool[u"inputs"], builder.job)
            normalizeFilesDirs(builder.job)
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job,
                                 strict=False, logger=_logger_validation_warnings)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        builder.files = []
        builder.bindings = CommentedSeq()
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.hints = self.hints
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")
        builder.debug = kwargs.get("debug")
        builder.js_console = kwargs.get("js_console")
        builder.mutation_manager = kwargs.get("mutation_manager")

        builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess
        builder.fs_access = builder.make_fs_access(kwargs["basedir"])
        builder.force_docker_pull = kwargs.get("force_docker_pull")

        loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement")
        if loadListingReq:
            builder.loadListing = loadListingReq.get("loadListing")

        dockerReq, is_req = self.get_requirement("DockerRequirement")
        defaultDocker = None

        if dockerReq is None and "default_container" in kwargs:
            defaultDocker = kwargs["default_container"]

        if (dockerReq or defaultDocker) and kwargs.get("use_container"):
            if dockerReq:
                # Check if docker output directory is absolute
                if dockerReq.get("dockerOutputDirectory") and dockerReq.get("dockerOutputDirectory").startswith('/'):
                    builder.outdir = dockerReq.get("dockerOutputDirectory")
                else:
                    builder.outdir = builder.fs_access.docker_compatible_realpath(
                        dockerReq.get("dockerOutputDirectory") or kwargs.get("docker_outdir") or "/var/spool/cwl")
            elif defaultDocker:
                builder.outdir = builder.fs_access.docker_compatible_realpath(
                    kwargs.get("docker_outdir") or "/var/spool/cwl")
            builder.tmpdir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_tmpdir") or "/tmp")
            builder.stagedir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl")
        else:
            builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp())
            builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp())
            builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp())

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc)
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", a)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", a)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    builder.bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        # TODO: unify for both runtime
        if six.PY3:
            key = cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda dict: dict["position"]
        builder.bindings.sort(key=key)
        builder.resources = self.evalResources(builder, kwargs)
        builder.job_script_provider = kwargs.get("job_script_provider", None)
        return builder
Exemple #35
0
    def _init_job(self, joborder, input_basedir, **kwargs):
        # type: (Dict[str, str], str, **Any) -> Builder
        builder = Builder()
        builder.job = copy.deepcopy(joborder)

        fillInDefaults(self.tool["inputs"], builder.job)

        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
        else:
            if kwargs.get('tmp_outdir_prefix'):
                builder.outdir = tempfile.mkdtemp(
                    prefix=kwargs.get('tmp_outdir_prefix'))
            else:
                builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            if kwargs.get('tmpdir_prefix'):
                builder.tmpdir = tempfile.mkdtemp(
                    prefix=kwargs.get('tmpdir_prefix'))
            else:
                builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(
            input_basedir)

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]),
                                self.formatgraph)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #36
0
    def _init_job(self, joborder, runtime_context):
        # type: (Mapping[Text, Text], RuntimeContext) -> Builder

        job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]],
                   copy.deepcopy(joborder))

        make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess)
        fs_access = make_fs_access(runtime_context.basedir)

        load_listing_req, _ = self.get_requirement(
            "LoadListingRequirement")

        if load_listing_req is not None:
            load_listing = load_listing_req.get("loadListing")
        else:
            load_listing = "no_listing"

        # Validate job order
        try:
            fill_in_defaults(self.tool[u"inputs"], job, fs_access)

            normalizeFilesDirs(job)
            schema = self.names.get_name("input_record_schema", "")
            if schema is None:
                raise WorkflowException("Missing input record schema: "
                    "{}".format(self.names))
            validate.validate_ex(schema, job, strict=False,
                                 logger=_logger_validation_warnings)

            if load_listing and load_listing != "no_listing":
                get_listing(fs_access, job, recursive=(load_listing == "deep_listing"))

            visit_class(job, ("File",), functools.partial(add_sizes, fs_access))

            if load_listing == "deep_listing":
                for i, inparm in enumerate(self.tool["inputs"]):
                    k = shortname(inparm["id"])
                    if k not in job:
                        continue
                    v = job[k]
                    dircount = [0]
                    def inc(d):  # type: (List[int]) -> None
                        d[0] += 1
                    visit_class(v, ("Directory",), lambda x: inc(dircount))
                    if dircount[0] == 0:
                        continue
                    filecount = [0]
                    visit_class(v, ("File",), lambda x: inc(filecount))
                    if filecount[0] > FILE_COUNT_WARNING:
                        # Long lines in this message are okay, will be reflowed based on terminal columns.
                        _logger.warning(strip_dup_lineno(SourceLine(self.tool["inputs"], i, Text).makeError(
                    """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'.  This may negatively affect workflow performance and memory use.

If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior:

$namespaces:
  cwltool: "http://commonwl.org/cwltool#"
hints:
  cwltool:LoadListingRequirement:
    loadListing: shallow_listing

""" % (filecount[0], k))))

        except (validate.ValidationException, WorkflowException) as err:
            raise WorkflowException("Invalid job input record:\n" + Text(err))

        files = []  # type: List[Dict[Text, Text]]
        bindings = CommentedSeq()
        tmpdir = u""
        stagedir = u""

        docker_req, _ = self.get_requirement("DockerRequirement")
        default_docker = None

        if docker_req is None and runtime_context.default_container:
            default_docker = runtime_context.default_container

        if (docker_req or default_docker) and runtime_context.use_container:
            if docker_req is not None:
                # Check if docker output directory is absolute
                if docker_req.get("dockerOutputDirectory") and \
                        docker_req.get("dockerOutputDirectory").startswith('/'):
                    outdir = docker_req.get("dockerOutputDirectory")
                else:
                    outdir = docker_req.get("dockerOutputDirectory") or \
                        runtime_context.docker_outdir or random_outdir()
            elif default_docker is not None:
                outdir = runtime_context.docker_outdir or random_outdir()
            tmpdir = runtime_context.docker_tmpdir or "/tmp"
            stagedir = runtime_context.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtime_context.outdir or tempfile.mkdtemp(
                    prefix=getdefault(runtime_context.tmp_outdir_prefix,
                                      DEFAULT_TMP_PREFIX)))
            if self.tool[u"class"] != 'Workflow':
                tmpdir = fs_access.realpath(runtime_context.tmpdir
                                            or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtime_context.stagedir
                                              or tempfile.mkdtemp())

        builder = Builder(job,
                          files,
                          bindings,
                          self.schemaDefs,
                          self.names,
                          self.requirements,
                          self.hints,
                          {},
                          runtime_context.mutation_manager,
                          self.formatgraph,
                          make_fs_access,
                          fs_access,
                          runtime_context.job_script_provider,
                          runtime_context.eval_timeout,
                          runtime_context.debug,
                          runtime_context.js_console,
                          runtime_context.force_docker_pull,
                          load_listing,
                          outdir,
                          tmpdir,
                          stagedir)

        bindings.extend(builder.bind_input(
            self.inputs_record_schema, job,
            discover_secondaryFiles=getdefault(runtime_context.toplevel, False)))

        if self.tool.get("baseCommand"):
            for index, command in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, index],
                    "datum": command
                })

        if self.tool.get("arguments"):
            for i, arg in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                filename = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(arg, MutableMapping):
                    arg = copy.deepcopy(arg)
                    if arg.get("position"):
                        arg["position"] = [arg["position"], i]
                    else:
                        arg["position"] = [0, i]
                    bindings.append(arg)
                elif ("$(" in arg) or ("${" in arg):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", arg)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", arg)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = filename
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        if PY3:
            key = functools.cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda d: d["position"]

        # This awkward construction replaces the contents of
        # "bindings" in place (because Builder expects it to be
        # mutated in place, sigh, I'm sorry) with its contents sorted,
        # supporting different versions of Python and ruamel.yaml with
        # different behaviors/bugs in CommentedSeq.
        bindings_copy = copy.deepcopy(bindings)
        del bindings[:]
        bindings.extend(sorted(bindings_copy, key=key))

        if self.tool[u"class"] != 'Workflow':
            builder.resources = self.evalResources(builder, runtime_context)
        return builder
    def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs):
        (_, self.names, _) = get_schema()
        self.tool = toolpath_object

        if do_validate:
            try:
                # Validate tool documument
                validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict"))
            except validate.ValidationException as v:
                raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v))))

        self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])

        self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema_salad.schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i
            avro.schema.make_avsc_object(av, self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []}
        self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []}

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                doc_url, _ = urlparse.urldefrag(c['id'])
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]

                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        try:
            self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException("Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4)))

        try:
            self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException("Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
Exemple #38
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[unicode, unicode], **Any) -> Builder
        builder = Builder()
        builder.job = cast(
            Dict[unicode, Union[Dict[unicode, Any], List, unicode]],
            copy.deepcopy(joborder))

        fillInDefaults(self.tool[u"inputs"], builder.job)
        normalizeFilesDirs(builder.job)

        # Validate job order
        try:
            validate.validate_ex(
                self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + str(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if dockerReq and is_req and not kwargs.get("use_container"):
            raise WorkflowException(
                "Document has DockerRequirement under 'requirements' but use_container is false.  DockerRequirement must be under 'hints' or use_container must be true."
            )

        if dockerReq and kwargs.get("use_container"):
            builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
            builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp"
            builder.stagedir = kwargs.get("docker_stagedir") or "/var/lib/cwl"
        else:
            builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp()
            builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp()
            builder.stagedir = kwargs.get("stagedir") or tempfile.mkdtemp()

        builder.fs_access = kwargs.get("fs_access") or StdFsAccess(
            kwargs["basedir"])

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]),
                                self.formatgraph)

        builder.bindings.extend(
            builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    builder.bindings.append({
                        "position": [0, i],
                        "do_eval": a,
                        "valueFrom": None
                    })
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #39
0
    def _init_job(self, joborder, runtimeContext):
        # type: (Dict[Text, Text], RuntimeContext) -> Builder
        """
        kwargs:

        use_container: do/don't use Docker when DockerRequirement hint provided
        make_fs_access: make an FsAccess() object with given basedir
        docker_outdir: output directory inside docker for this job
        docker_tmpdir: tmpdir inside docker for this job
        docker_stagedir: stagedir inside docker for this job
        outdir: outdir on host for this job
        tmpdir: tmpdir on host for this job
        stagedir: stagedir on host for this job
        select_resources: callback to select compute resources
        tmp_outdir_prefix: Path prefix for intermediate output directories
        """

        job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]],
                   copy.deepcopy(joborder))
        # Validate job order
        try:
            fillInDefaults(self.tool[u"inputs"], job)
            normalizeFilesDirs(job)
            validate.validate_ex(self.names.get_name("input_record_schema",
                                                     ""),
                                 job,
                                 strict=False,
                                 logger=_logger_validation_warnings)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        files = []  # type: List[Dict[Text, Text]]
        bindings = CommentedSeq()
        make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        fs_access = make_fs_access(runtimeContext.basedir)
        tmpdir = u""
        stagedir = u""

        loadListingReq, _ = self.get_requirement(
            "http://commonwl.org/cwltool#LoadListingRequirement")
        if loadListingReq:
            loadListing = loadListingReq.get("loadListing")
        else:
            loadListing = "deep_listing"  # will default to "no_listing" in CWL v1.1

        dockerReq, _ = self.get_requirement("DockerRequirement")
        defaultDocker = None

        if dockerReq is None and runtimeContext.default_container:
            defaultDocker = runtimeContext.default_container

        if (dockerReq or defaultDocker) and runtimeContext.use_container:
            if dockerReq:
                # Check if docker output directory is absolute
                if dockerReq.get("dockerOutputDirectory") and \
                        dockerReq.get("dockerOutputDirectory").startswith('/'):
                    outdir = dockerReq.get("dockerOutputDirectory")
                else:
                    outdir = fs_access.docker_compatible_realpath(
                        dockerReq.get("dockerOutputDirectory")
                        or runtimeContext.docker_outdir or "/var/spool/cwl")
            elif defaultDocker:
                outdir = fs_access.docker_compatible_realpath(
                    runtimeContext.docker_outdir or "/var/spool/cwl")
            tmpdir = fs_access.docker_compatible_realpath(
                runtimeContext.docker_tmpdir or "/tmp")
            stagedir = fs_access.docker_compatible_realpath(
                runtimeContext.docker_stagedir or "/var/lib/cwl")
        else:
            outdir = fs_access.realpath(
                runtimeContext.outdir or tempfile.mkdtemp(prefix=getdefault(
                    runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
            if self.tool[u"class"] != 'Workflow':
                tmpdir = fs_access.realpath(runtimeContext.tmpdir
                                            or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtimeContext.stagedir
                                              or tempfile.mkdtemp())

        builder = Builder(
            job, files, bindings, self.schemaDefs, self.names,
            self.requirements, self.hints, runtimeContext.eval_timeout,
            runtimeContext.debug, {}, runtimeContext.js_console,
            runtimeContext.mutation_manager, self.formatgraph, make_fs_access,
            fs_access, runtimeContext.force_docker_pull, loadListing, outdir,
            tmpdir, stagedir, runtimeContext.job_script_provider)

        bindings.extend(
            builder.bind_input(self.inputs_record_schema,
                               job,
                               discover_secondaryFiles=getdefault(
                                   runtimeContext.toplevel, False)))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({"position": [-1000000, n], "datum": b})

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((("position", [0, i]), ("valueFrom", a)))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)
                else:
                    cm = CommentedMap((("position", [0, i]), ("datum", a)))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        # TODO: unify for both runtime
        if six.PY3:
            key = cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda dict: dict["position"]
        bindings.sort(key=key)
        builder.resources = self.evalResources(builder, runtimeContext)
        return builder
Exemple #40
0
    def _init_job(self, joborder, runtimeContext):
        # type: (Dict[Text, Text], RuntimeContext) -> Builder

        job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]],
                   copy.deepcopy(joborder))

        make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)
        fs_access = make_fs_access(runtimeContext.basedir)

        # Validate job order
        try:
            fill_in_defaults(self.tool[u"inputs"], job, fs_access)
            normalizeFilesDirs(job)
            validate.validate_ex(self.names.get_name("input_record_schema", ""),
                                 job, strict=False, logger=_logger_validation_warnings)
        except (validate.ValidationException, WorkflowException) as e:
            raise WorkflowException("Invalid job input record:\n" + Text(e))

        files = []  # type: List[Dict[Text, Text]]
        bindings = CommentedSeq()
        tmpdir = u""
        stagedir = u""

        loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement")
        if loadListingReq:
            loadListing = loadListingReq.get("loadListing")
        else:
            loadListing = "deep_listing"   # will default to "no_listing" in CWL v1.1

        dockerReq, _ = self.get_requirement("DockerRequirement")
        defaultDocker = None

        if dockerReq is None and runtimeContext.default_container:
            defaultDocker = runtimeContext.default_container

        if (dockerReq or defaultDocker) and runtimeContext.use_container:
            if dockerReq:
                # Check if docker output directory is absolute
                if dockerReq.get("dockerOutputDirectory") and \
                        dockerReq.get("dockerOutputDirectory").startswith('/'):
                    outdir = dockerReq.get("dockerOutputDirectory")
                else:
                    outdir = dockerReq.get("dockerOutputDirectory") or \
                        runtimeContext.docker_outdir or "/var/spool/cwl"
            elif defaultDocker:
                outdir = runtimeContext.docker_outdir or "/var/spool/cwl"
            tmpdir = runtimeContext.docker_tmpdir or "/tmp"
            stagedir = runtimeContext.docker_stagedir or "/var/lib/cwl"
        else:
            outdir = fs_access.realpath(
                runtimeContext.outdir or tempfile.mkdtemp(
                    prefix=getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
            if self.tool[u"class"] != 'Workflow':
                tmpdir = fs_access.realpath(runtimeContext.tmpdir or tempfile.mkdtemp())
                stagedir = fs_access.realpath(runtimeContext.stagedir or tempfile.mkdtemp())

        builder = Builder(job,
                          files,
                          bindings,
                          self.schemaDefs,
                          self.names,
                          self.requirements,
                          self.hints,
                          runtimeContext.eval_timeout,
                          runtimeContext.debug,
                          {},
                          runtimeContext.js_console,
                          runtimeContext.mutation_manager,
                          self.formatgraph,
                          make_fs_access,
                          fs_access,
                          runtimeContext.force_docker_pull,
                          loadListing,
                          outdir,
                          tmpdir,
                          stagedir,
                          runtimeContext.job_script_provider)

        bindings.extend(builder.bind_input(
            self.inputs_record_schema, job,
            discover_secondaryFiles=getdefault(runtimeContext.toplevel, False)))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                lc = self.tool["arguments"].lc.data[i]
                fn = self.tool["arguments"].lc.filename
                bindings.lc.add_kv_line_col(len(bindings), lc)
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("valueFrom", a)
                    ))
                    cm.lc.add_kv_line_col("valueFrom", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)
                else:
                    cm = CommentedMap((
                        ("position", [0, i]),
                        ("datum", a)
                    ))
                    cm.lc.add_kv_line_col("datum", lc)
                    cm.lc.filename = fn
                    bindings.append(cm)

        # use python2 like sorting of heterogeneous lists
        # (containing str and int types),
        # TODO: unify for both runtime
        if six.PY3:
            key = cmp_to_key(cmp_like_py2)
        else:  # PY2
            key = lambda dict: dict["position"]
        bindings.sort(key=key)

        if self.tool[u"class"] != 'Workflow':
            builder.resources = self.evalResources(builder, runtimeContext)
        return builder
Exemple #41
0
    def _init_job(self, joborder, **kwargs):
        # type: (Dict[Text, Text], **Any) -> Builder
        """
        kwargs:

        eval_timeout: javascript evaluation timeout
        use_container: do/don't use Docker when DockerRequirement hint provided
        make_fs_access: make an FsAccess() object with given basedir
        basedir: basedir for FsAccess
        docker_outdir: output directory inside docker for this job
        docker_tmpdir: tmpdir inside docker for this job
        docker_stagedir: stagedir inside docker for this job
        outdir: outdir on host for this job
        tmpdir: tmpdir on host for this job
        stagedir: stagedir on host for this job
        select_resources: callback to select compute resources
        """

        builder = Builder()
        builder.job = cast(Dict[Text, Union[Dict[Text, Any], List,
            Text]], copy.deepcopy(joborder))

        fillInDefaults(self.tool[u"inputs"], builder.job)
        normalizeFilesDirs(builder.job)

        # Validate job order
        try:
            validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job)
        except validate.ValidationException as e:
            raise WorkflowException("Error validating input record, " + Text(e))

        builder.files = []
        builder.bindings = []
        builder.schemaDefs = self.schemaDefs
        builder.names = self.names
        builder.requirements = self.requirements
        builder.resources = {}
        builder.timeout = kwargs.get("eval_timeout")

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if dockerReq and is_req and not kwargs.get("use_container"):
            raise WorkflowException("Document has DockerRequirement under 'requirements' but use_container is false.  DockerRequirement must be under 'hints' or use_container must be true.")

        builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess
        builder.fs_access = builder.make_fs_access(kwargs["basedir"])

        if dockerReq and kwargs.get("use_container"):
            builder.outdir = builder.fs_access.realpath(kwargs.get("docker_outdir") or "/var/spool/cwl")
            builder.tmpdir = builder.fs_access.realpath(kwargs.get("docker_tmpdir") or "/tmp")
            builder.stagedir = builder.fs_access.realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl")
        else:
            builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp())
            builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp())
            builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp())

        if self.formatgraph:
            for i in self.tool["inputs"]:
                d = shortname(i["id"])
                if d in builder.job and i.get("format"):
                    checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph)

        builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job))

        if self.tool.get("baseCommand"):
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "datum": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    builder.bindings.append(a)
                elif ("$(" in a) or ("${" in a):
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "datum": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        builder.resources = self.evalResources(builder, kwargs)

        return builder
Exemple #42
0
    def collect_output_ports(
        self,
        ports: Union[CommentedSeq, Set[CWLObjectType]],
        builder: Builder,
        outdir: str,
        rcode: int,
        compute_checksum: bool = True,
        jobname: str = "",
        readers: Optional[MutableMapping[str, CWLObjectType]] = None,
    ) -> OutputPortsType:
        ret = {}  # type: OutputPortsType
        debug = _logger.isEnabledFor(logging.DEBUG)
        cwl_version = self.metadata.get(
            "http://commonwl.org/cwltool#original_cwlVersion", None)
        if cwl_version != "v1.0":
            builder.resources["exitCode"] = rcode
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if debug:
                    _logger.debug(
                        "Raw output from %s: %s",
                        custom_output,
                        json_dumps(ret, indent=4),
                    )
            else:
                for i, port in enumerate(ports):

                    with SourceLine(
                            ports,
                            i,
                            partial(ParameterOutputWorkflowException,
                                    port=port),
                            debug,
                    ):
                        fragment = shortname(port["id"])
                        ret[fragment] = self.collect_output(
                            port,
                            builder,
                            outdir,
                            fs_access,
                            compute_checksum=compute_checksum,
                        )
            if ret:
                revmap = partial(revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)
                visit_class(ret, ("File", "Directory"), revmap)
                visit_class(ret, ("File", "Directory"), remove_path)
                normalizeFilesDirs(ret)
                visit_class(
                    ret,
                    ("File", "Directory"),
                    partial(check_valid_locations, fs_access),
                )

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))
            expected_schema = cast(
                Schema, self.names.get_name("outputs_record_schema", None))
            validate_ex(expected_schema,
                        ret,
                        strict=False,
                        logger=_logger_validation_warnings)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except ValidationException as e:
            raise WorkflowException("Error validating output record. " +
                                    str(e) + "\n in " +
                                    json_dumps(ret, indent=4)) from e
        finally:
            if builder.mutation_manager and readers:
                for r in readers.values():
                    builder.mutation_manager.release_reader(jobname, r)