Esempio n. 1
0
class Config:
    def __init__(self, yaml_path=None, key_file_path=None, **kwargs):
        self.config_dict = CommentedMap()
        for keyword in kwargs:
            self.config_dict[keyword] = kwargs[keyword]
        if yaml_path is not None:
            with open(yaml_path) as config_file:
                self.config_dict = yaml.load(config_file, Loader=yaml.Loader)
        else:
            self.config_dict['columns_to_anonymize'] = {}
        if key_file_path is not None:
            with open(key_file_path) as key_file:
                self.secret_key = key_file.read().strip()

    @property
    def columns_to_anonymize(self):
        return self.config_dict.get('columns_to_anonymize')

    @property
    def delimiter(self):
        return self.config_dict.get('delimiter')

    def add_column_config(self, column_name, column_config_dict):
        self.config_dict['columns_to_anonymize'][
            column_name] = column_config_dict

    def save_config(self, save_name=None):
        if save_name is None:
            from datetime import date
            save_name = date.today().strftime('%Y-%m-%d_generated_config.yml')
        with open(save_name, 'w') as save_file:
            yaml.round_trip_dump(self.config_dict,
                                 save_file,
                                 default_flow_style=False)
Esempio n. 2
0
    def examples(self):
        """ Generate EXAMPLES doc string by matching the API version and model name to a YAML file found in
             ./examples """
        result = []
        result_str = ''
        # check if an example file exists
        file_name = self.module_file
        example_path = os.path.join(os.path.dirname(__file__), 'examples',
                                    file_name)
        if os.path.exists(example_path):
            logger.debug('parsing {}'.format(example_path))
            yaml_examples = ruamel.yaml.load(
                open(example_path, 'r'), Loader=ruamel.yaml.RoundTripLoader)
            for ex in yaml_examples['tasks']:
                new_example = CommentedMap()
                for key, value in ex.items():
                    if key == 'name':
                        # Add name as the first key in new_example
                        new_example['name'] = value

                for key, value in ex.items():
                    if key != 'name':
                        # Add the module name as the second key
                        module_name = self.module_file
                        new_example[module_name] = value
                        if 'state' in self.helper.argspec:
                            # Add a state parameter to the example, placing it after the namespace parameter, or name,
                            #  if no namespace.
                            new_example[module_name] = CommentedMap()
                            if isinstance(value, type(CommentedMap())):
                                params = list(value.keys())
                                i = 0
                                add_after = 'name'
                                if 'namespace' in params:
                                    add_after = 'namespace'
                                if key in ('create', 'patch'):
                                    state = 'present'
                                elif key == 'replace':
                                    state = 'replaced'
                                else:
                                    state = 'absent'

                                while i < len(params):
                                    new_example[module_name][
                                        params[i]] = value[params[i]]
                                    if params[i] == add_after:
                                        new_example[module_name][
                                            'state'] = state
                                    i += 1

                if new_example.get('name'):
                    result.append(new_example)

        if len(result):
            result_str = ruamel.yaml.dump(result,
                                          Dumper=ruamel.yaml.RoundTripDumper,
                                          width=80)
            result_str = re.sub('\n- name:', '\n\n- name:', result_str)

        return result_str
Esempio n. 3
0
 def _recursive_set(self, data: CommentedMap, key: str, value: Any) -> None:
     key, next_key = self._parse_key(key)
     if next_key is not None:
         if key not in data:
             data[key] = CommentedMap()
         next_data = data.get(key, CommentedMap())
         return self._recursive_set(next_data, next_key, value)
     data[key] = value
Esempio n. 4
0
 def _recursive_set(self, data: CommentedMap, key: str, value: Any) -> None:
     if '.' in key:
         key, next_key = key.split('.', 1)
         if key not in data:
             data[key] = CommentedMap()
         next_data = data.get(key, CommentedMap())
         self._recursive_set(next_data, next_key, value)
         return
     data[key] = value
Esempio n. 5
0
def validate_js_expressions(
    tool: CommentedMap,
    schema: Schema,
    jshint_options: Optional[Dict[str, Union[List[str], str, int]]] = None,
    container_engine: str = "docker",
) -> None:

    if tool.get("requirements") is None:
        return
    debug = _logger.isEnabledFor(logging.DEBUG)
    requirements = tool["requirements"]

    default_globals = ["self", "inputs", "runtime", "console"]

    for prop in reversed(requirements):
        if prop["class"] == "InlineJavascriptRequirement":
            expression_lib = prop.get("expressionLib", [])
            break
    else:
        return

    js_globals = copy.deepcopy(default_globals)

    for i, expression_lib_line in enumerate(expression_lib):
        expression_lib_line_errors, expression_lib_line_globals = jshint_js(
            expression_lib_line, js_globals, jshint_options, container_engine)
        js_globals.extend(expression_lib_line_globals)
        print_js_hint_messages(
            expression_lib_line_errors,
            SourceLine(expression_lib, i, include_traceback=debug),
        )

    expressions = get_expressions(tool, schema)

    for expression, source_line in expressions:
        unscanned_str = expression.strip()
        try:
            scan_slice = scan_expression(unscanned_str)
        except SubstitutionError as se:
            if source_line:
                source_line.raise_type = WorkflowException
                raise source_line.makeError(str(se))
            else:
                raise se

        while scan_slice:
            if unscanned_str[scan_slice[0]] == "$":
                code_fragment = unscanned_str[scan_slice[0] + 1:scan_slice[1]]
                code_fragment_js = code_fragment_to_js(code_fragment, "")
                expression_errors, _ = jshint_js(code_fragment_js, js_globals,
                                                 jshint_options,
                                                 container_engine)
                print_js_hint_messages(expression_errors, source_line)

            unscanned_str = unscanned_str[scan_slice[1]:]
            scan_slice = scan_expression(unscanned_str)
Esempio n. 6
0
def checkversion(
    doc: Union[CommentedSeq, CommentedMap],
    metadata: CommentedMap,
    enable_dev: bool,
) -> Tuple[CommentedMap, str]:
    """Check the validity of the version of the give CWL document.

    Returns the document and the validated version string.
    """
    cdoc = None  # type: Optional[CommentedMap]
    if isinstance(doc, CommentedSeq):
        if not isinstance(metadata, CommentedMap):
            raise Exception("Expected metadata to be CommentedMap")
        lc = metadata.lc
        metadata = copy.deepcopy(metadata)
        metadata.lc.data = copy.copy(lc.data)
        metadata.lc.filename = lc.filename
        metadata["$graph"] = doc
        cdoc = metadata
    elif isinstance(doc, CommentedMap):
        cdoc = doc
    else:
        raise Exception("Expected CommentedMap or CommentedSeq")

    version = metadata["cwlVersion"]
    cdoc["cwlVersion"] = version

    updated_from = metadata.get(ORIGINAL_CWLVERSION) or cdoc.get(
        ORIGINAL_CWLVERSION)

    if updated_from:
        if version != INTERNAL_VERSION:
            raise ValidationException(
                "original_cwlVersion is set (%s) but cwlVersion is '%s', expected '%s' "
                % (updated_from, version, INTERNAL_VERSION))
    elif version not in UPDATES:
        if version in DEVUPDATES:
            if enable_dev:
                pass
            else:
                keys = list(UPDATES.keys())
                keys.sort()
                raise ValidationException(
                    u"Version '%s' is a development or deprecated version.\n "
                    "Update your document to a stable version (%s) or use "
                    "--enable-dev to enable support for development and "
                    "deprecated versions." % (version, ", ".join(keys)))
        else:
            raise ValidationException("Unrecognized version %s" % version)

    return (cdoc, version)
Esempio n. 7
0
def format_node(cwl: Union[dict, list, str], node_path=None):
    if isinstance(cwl, str):
        if len(cwl) > 80:
            return Literal(cwl)
        else:
            return cwl

    elif isinstance(cwl, dict):
        _fmt_cwl = CommentedMap([
            (k, format_node(v, node_path + [k])) for k, v in reorder_node(cwl, node_path)])
        if _fmt_cwl.get("class") in ["CommandLineTool", "ExpressionTool", "Workflow"]:
            add_space_between_main_sections(_fmt_cwl)
        return _fmt_cwl

    elif isinstance(cwl, list):
        return [format_node(v, node_path) for v in cwl]

    else:
        return cwl
Esempio n. 8
0
def _check_properties(
    check: str, file: str, path: str, properties: CommentedMap, reference: Dict[str, Any]
) -> bool:
    if path:
        path += "."

    success = True

    for key, value in reference.items():
        if key not in properties:
            c2cciutils.error(
                check,
                f"The property '{path}{key}' should be defined",
                file,
                properties.lc.line + 1,
                properties.lc.col + 1,
            )
            success = False
        if isinstance(value, dict):
            if not isinstance(properties[key], dict):
                c2cciutils.error(
                    check,
                    f"The property '{path}{key}' should be a dictionary",
                    file,
                    properties.lc.line + 1,
                    properties.lc.col + 1,
                )
                success = False
            else:
                success &= _check_properties(check, file, path + key, properties[key], value)
        else:
            if properties.get(key) != value:
                c2cciutils.error(
                    check,
                    f"The property '{path}{key}' should have the value, '{value}', "
                    f"but is '{properties.get(key)}'",
                    file,
                    properties.lc.line + 1,
                    properties.lc.col + 1,
                )
                success = False
    return success
Esempio n. 9
0
    def bind_input(self,
                   schema,                   # type: MutableMapping[Text, Any]
                   datum,                    # type: Any
                   discover_secondaryFiles,  # type: bool
                   lead_pos=None,            # type: Optional[Union[int, List[int]]]
                   tail_pos=None,            # type: Optional[List[int]]
                  ):  # type: (...) -> List[MutableMapping[Text, Any]]

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[Text, Text]]
        binding = None  # type: Optional[MutableMapping[Text,Any]]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())
            assert binding is not None

            bp = list(aslist(lead_pos))
            if "position" in binding:
                bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, string_types) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                assert avsc is not None
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                    else:
                        self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if binding is not None\
                    and "inputBinding" not in st\
                    and "type" in st\
                    and st["type"] == "array"\
                    and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
            else:
                bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding is not None:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
                binding = None

            def _capture_files(f):
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8")

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if 'required' in sf:
                            sf_required = self.do_eval(sf['required'], context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][d["location"].rindex("/")+1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                sf_location = datum["location"][0:datum["location"].rindex("/")+1]+sfname
                                if isinstance(sfname, MutableMapping):
                                    datum["secondaryFiles"].append(sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(sf_location):
                                    datum["secondaryFiles"].append({
                                        "location": sf_location,
                                        "basename": sfname,
                                        "class": "File"})
                                elif sf_required:
                                    raise WorkflowException("Missing required secondary file '%s' from file object: %s" % (
                                        sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve))

                visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding is not None:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Esempio n. 10
0
    def __init__(self, toolpath_object: CommentedMap,
                 loadingContext: LoadingContext) -> None:
        """Build a Process object from the provided dictionary."""
        super(Process, self).__init__()
        self.metadata = getdefault(loadingContext.metadata,
                                   {})  # type: CWLObjectType
        self.provenance_object = None  # type: Optional[ProvenanceProfile]
        self.parent_wf = None  # type: Optional[ProvenanceProfile]
        global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY  # pylint: disable=global-statement
        if SCHEMA_FILE is None or SCHEMA_ANY is None or SCHEMA_DIR is None:
            get_schema("v1.0")
            SCHEMA_ANY = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"],
            )
            SCHEMA_FILE = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"],
            )
            SCHEMA_DIR = cast(
                CWLObjectType,
                SCHEMA_CACHE["v1.0"]
                [3].idx["https://w3id.org/cwl/cwl#Directory"],
            )

        self.names = make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY],
                                      Loader({}))
        self.tool = toolpath_object
        self.requirements = copy.deepcopy(
            getdefault(loadingContext.requirements, []))
        self.requirements.extend(self.tool.get("requirements", []))
        if "id" not in self.tool:
            self.tool["id"] = "_:" + str(uuid.uuid4())
        self.requirements.extend(
            cast(
                List[CWLObjectType],
                get_overrides(getdefault(loadingContext.overrides_list, []),
                              self.tool["id"]).get("requirements", []),
            ))
        self.hints = copy.deepcopy(getdefault(loadingContext.hints, []))
        self.hints.extend(self.tool.get("hints", []))
        # Versions of requirements and hints which aren't mutated.
        self.original_requirements = copy.deepcopy(self.requirements)
        self.original_hints = copy.deepcopy(self.hints)
        self.doc_loader = loadingContext.loader
        self.doc_schema = loadingContext.avsc_names

        self.formatgraph = None  # type: Optional[Graph]
        if self.doc_loader is not None:
            self.formatgraph = self.doc_loader.graph

        checkRequirements(self.tool, supportedProcessRequirements)
        self.validate_hints(
            cast(Names, loadingContext.avsc_names),
            self.tool.get("hints", []),
            strict=getdefault(loadingContext.strict, False),
        )

        self.schemaDefs = {}  # type: MutableMapping[str, CWLObjectType]

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd is not None:
            sdtypes = cast(MutableSequence[CWLObjectType], sd["types"])
            avroize_type(cast(MutableSequence[CWLOutputType], sdtypes))
            av = make_valid_avro(
                sdtypes,
                {
                    cast(str, t["name"]): cast(Dict[str, Any], t)
                    for t in sdtypes
                },
                set(),
            )
            for i in av:
                self.schemaDefs[i["name"]] = i  # type: ignore
            make_avsc_object(convert_to_dict(av), self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": [],
        }  # type: CWLObjectType
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": [],
        }  # type: CWLObjectType

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.deepcopy(i)
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise ValidationException(
                        "Missing 'type' in parameter '{}'".format(c["name"]))

                if "default" in c and "null" not in aslist(c["type"]):
                    nullable = ["null"]
                    nullable.extend(aslist(c["type"]))
                    c["type"] = nullable
                else:
                    c["type"] = c["type"]
                avroize_type(c["type"], c["name"])
                if key == "inputs":
                    cast(List[CWLObjectType],
                         self.inputs_record_schema["fields"]).append(c)
                elif key == "outputs":
                    cast(List[CWLObjectType],
                         self.outputs_record_schema["fields"]).append(c)

        with SourceLine(toolpath_object, "inputs", ValidationException):
            self.inputs_record_schema = cast(
                CWLObjectType,
                make_valid_avro(self.inputs_record_schema, {}, set()),
            )
            make_avsc_object(convert_to_dict(self.inputs_record_schema),
                             self.names)
        with SourceLine(toolpath_object, "outputs", ValidationException):
            self.outputs_record_schema = cast(
                CWLObjectType,
                make_valid_avro(self.outputs_record_schema, {}, set()),
            )
            make_avsc_object(convert_to_dict(self.outputs_record_schema),
                             self.names)

        if toolpath_object.get("class") is not None and not getdefault(
                loadingContext.disable_js_validation, False):
            validate_js_options = (
                None)  # type: Optional[Dict[str, Union[List[str], str, int]]]
            if loadingContext.js_hint_options_file is not None:
                try:
                    with open(loadingContext.js_hint_options_file
                              ) as options_file:
                        validate_js_options = json.load(options_file)
                except (OSError, ValueError):
                    _logger.error(
                        "Failed to read options file %s",
                        loadingContext.js_hint_options_file,
                    )
                    raise
            if self.doc_schema is not None:
                validate_js_expressions(
                    toolpath_object,
                    self.doc_schema.names[toolpath_object["class"]],
                    validate_js_options,
                )

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if (dockerReq is not None and "dockerOutputDirectory" in dockerReq
                and is_req is not None and not is_req):
            _logger.warning(
                SourceLine(item=dockerReq, raise_type=str).makeError(
                    "When 'dockerOutputDirectory' is declared, DockerRequirement "
                    "should go in the 'requirements' section, not 'hints'."
                    ""))

        if (dockerReq is not None and is_req is not None and
                dockerReq.get("dockerOutputDirectory") == "/var/spool/cwl"):
            if is_req:
                # In this specific case, it is legal to have /var/spool/cwl, so skip the check.
                pass
            else:
                # Must be a requirement
                var_spool_cwl_detector(self.tool)
        else:
            var_spool_cwl_detector(self.tool)
Esempio n. 11
0
    def __init__(
        self,
        toolpath_object: CommentedMap,
        pos: int,
        loadingContext: LoadingContext,
        parentworkflowProv: Optional[ProvenanceProfile] = None,
    ) -> None:
        """Initialize this WorkflowStep."""
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + str(pos)

        loadingContext = loadingContext.copy()

        loadingContext.requirements = copy.deepcopy(
            getdefault(loadingContext.requirements, [])
        )
        assert loadingContext.requirements is not None  # nosec
        loadingContext.requirements.extend(toolpath_object.get("requirements", []))
        loadingContext.requirements.extend(
            cast(
                List[CWLObjectType],
                get_overrides(
                    getdefault(loadingContext.overrides_list, []), self.id
                ).get("requirements", []),
            )
        )

        hints = copy.deepcopy(getdefault(loadingContext.hints, []))
        hints.extend(toolpath_object.get("hints", []))
        loadingContext.hints = hints

        try:
            if isinstance(toolpath_object["run"], CommentedMap):
                self.embedded_tool = loadingContext.construct_tool_object(
                    toolpath_object["run"], loadingContext
                )  # type: Process
            else:
                loadingContext.metadata = {}
                self.embedded_tool = load_tool(toolpath_object["run"], loadingContext)
        except ValidationException as vexc:
            if loadingContext.debug:
                _logger.exception("Validation exception")
            raise WorkflowException(
                "Tool definition %s failed validation:\n%s"
                % (toolpath_object["run"], indent(str(vexc)))
            ) from vexc

        validation_errors = []
        self.tool = toolpath_object = copy.deepcopy(toolpath_object)
        bound = set()
        for stepfield, toolfield in (("in", "inputs"), ("out", "outputs")):
            toolpath_object[toolfield] = []
            for index, step_entry in enumerate(toolpath_object[stepfield]):
                if isinstance(step_entry, str):
                    param = CommentedMap()  # type: CommentedMap
                    inputid = step_entry
                else:
                    param = CommentedMap(step_entry.items())
                    inputid = step_entry["id"]

                shortinputid = shortname(inputid)
                found = False
                for tool_entry in self.embedded_tool.tool[toolfield]:
                    frag = shortname(tool_entry["id"])
                    if frag == shortinputid:
                        # if the case that the step has a default for a parameter,
                        # we do not want the default of the tool to override it
                        step_default = None
                        if "default" in param and "default" in tool_entry:
                            step_default = param["default"]
                        param.update(tool_entry)
                        param["_tool_entry"] = tool_entry
                        if step_default is not None:
                            param["default"] = step_default
                        found = True
                        bound.add(frag)
                        break
                if not found:
                    if stepfield == "in":
                        param["type"] = "Any"
                        param["used_by_step"] = used_by_step(self.tool, shortinputid)
                        param["not_connected"] = True
                    else:
                        if isinstance(step_entry, Mapping):
                            step_entry_name = step_entry["id"]
                        else:
                            step_entry_name = step_entry
                        validation_errors.append(
                            SourceLine(self.tool["out"], index).makeError(
                                "Workflow step output '%s' does not correspond to"
                                % shortname(step_entry_name)
                            )
                            + "\n"
                            + SourceLine(self.embedded_tool.tool, "outputs").makeError(
                                "  tool output (expected '%s')"
                                % (
                                    "', '".join(
                                        [
                                            shortname(tool_entry["id"])
                                            for tool_entry in self.embedded_tool.tool[
                                                "outputs"
                                            ]
                                        ]
                                    )
                                )
                            )
                        )
                param["id"] = inputid
                param.lc.line = toolpath_object[stepfield].lc.data[index][0]
                param.lc.col = toolpath_object[stepfield].lc.data[index][1]
                param.lc.filename = toolpath_object[stepfield].lc.filename
                toolpath_object[toolfield].append(param)

        missing_values = []
        for _, tool_entry in enumerate(self.embedded_tool.tool["inputs"]):
            if shortname(tool_entry["id"]) not in bound:
                if "null" not in tool_entry["type"] and "default" not in tool_entry:
                    missing_values.append(shortname(tool_entry["id"]))

        if missing_values:
            validation_errors.append(
                SourceLine(self.tool, "in").makeError(
                    "Step is missing required parameter%s '%s'"
                    % (
                        "s" if len(missing_values) > 1 else "",
                        "', '".join(missing_values),
                    )
                )
            )

        if validation_errors:
            raise ValidationException("\n".join(validation_errors))

        super().__init__(toolpath_object, loadingContext)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature, _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but "
                    "SubworkflowFeatureRequirement not in requirements"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement "
                    "not in requirements"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise ValidationException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for inp in scatter:
                if inp not in inp_map:
                    raise ValidationException(
                        SourceLine(self.tool, "scatter").makeError(
                            "Scatter parameter '%s' does not correspond to "
                            "an input parameter of this step, expecting '%s'"
                            % (
                                shortname(inp),
                                "', '".join(shortname(k) for k in inp_map.keys()),
                            )
                        )
                    )

                inp_map[inp]["type"] = {"type": "array", "items": inp_map[inp]["type"]}

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for _ in range(0, nesting):
                for oparam in outputparms:
                    oparam["type"] = {"type": "array", "items": oparam["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
        self.prov_obj = None  # type: Optional[ProvenanceProfile]
        if loadingContext.research_obj is not None:
            self.prov_obj = parentworkflowProv
            if self.embedded_tool.tool["class"] == "Workflow":
                self.parent_wf = self.embedded_tool.parent_wf
            else:
                self.parent_wf = self.prov_obj
Esempio n. 12
0
    def bind_input(
        self,
        schema: CWLObjectType,
        datum: Union[CWLObjectType, List[CWLObjectType]],
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[Union[str, List[int]]] = None,
    ) -> List[MutableMapping[str, Union[str, List[int]]]]:
        debug = _logger.isEnabledFor(logging.DEBUG)

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, Union[str, List[int]]]]
        binding = (
            {}
        )  # type: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(
            schema["inputBinding"], MutableMapping
        ):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position, str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    result = self.do_eval(position, context=datum)
                    if not isinstance(result, int):
                        raise SourceLine(
                            schema["inputBinding"], "position", WorkflowException, debug
                        ).makeError(
                            "'position' expressions must evaluate to an int, "
                            f"not a {type(result)}. Expression {position} "
                            f"resulted in '{result}'."
                        )
                    binding["position"] = result
                    bp.append(result)
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, None):
                    avsc = self.names.get_name(t, None)
                elif (
                    isinstance(t, MutableMapping)
                    and "name" in t
                    and self.names.has_name(cast(str, t["name"]), None)
                ):
                    avsc = self.names.get_name(cast(str, t["name"]), None)
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate(avsc, datum, vocab=INPUT_OBJ_VOCAB):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise ValidationException(
                    "'{}' is not a valid union {}".format(datum, schema["type"])
                )
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (
                binding
                and "inputBinding" not in st
                and "type" in st
                and st["type"] == "array"
                and "itemSeparator" not in binding
            ):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    )
                )
        else:
            if schema["type"] == "org.w3id.cwl.salad.Any":
                if isinstance(datum, dict):
                    if datum.get("class") == "File":
                        schema["type"] = "org.w3id.cwl.cwl.File"
                    elif datum.get("class") == "Directory":
                        schema["type"] = "org.w3id.cwl.cwl.Directory"
                    else:
                        schema["type"] = "record"
                        schema["fields"] = [
                            {"name": field_name, "type": "Any"}
                            for field_name in datum.keys()
                        ]
                elif isinstance(datum, list):
                    schema["type"] = "array"
                    schema["items"] = "Any"

            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[cast(str, schema["type"])]

            if schema["type"] == "record":
                datum = cast(CWLObjectType, datum)
                for f in cast(List[CWLObjectType], schema["fields"]):
                    name = cast(str, f["name"])
                    if name in datum and datum[name] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                cast(CWLObjectType, datum[name]),
                                lead_pos=lead_pos,
                                tail_pos=name,
                                discover_secondaryFiles=discover_secondaryFiles,
                            )
                        )
                    else:
                        datum[name] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(cast(MutableSequence[CWLObjectType], datum)):
                    b2 = None
                    if binding:
                        b2 = cast(CWLObjectType, copy.deepcopy(binding))
                        b2["datum"] = item
                    itemschema = {
                        "type": schema["items"],
                        "inputBinding": b2,
                    }  # type: CWLObjectType
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    )
                binding = {}

            def _capture_files(f: CWLObjectType) -> CWLObjectType:
                self.files.append(f)
                return f

            if schema["type"] == "org.w3id.cwl.cwl.File":
                datum = cast(CWLObjectType, datum)
                self.files.append(datum)

                loadContents_sourceline = (
                    None
                )  # type: Union[None, MutableMapping[str, Union[str, List[int]]], CWLObjectType]
                if binding and binding.get("loadContents"):
                    loadContents_sourceline = binding
                elif schema.get("loadContents"):
                    loadContents_sourceline = schema

                if loadContents_sourceline and loadContents_sourceline["loadContents"]:
                    with SourceLine(
                        loadContents_sourceline,
                        "loadContents",
                        WorkflowException,
                        debug,
                    ):
                        try:
                            with self.fs_access.open(
                                cast(str, datum["location"]), "rb"
                            ) as f2:
                                datum["contents"] = content_limit_respected_read(f2)
                        except Exception as e:
                            raise Exception(
                                "Reading {}\n{}".format(datum["location"], e)
                            )

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                        sf_schema = aslist(schema["secondaryFiles"])
                    elif not discover_secondaryFiles:
                        sf_schema = []  # trust the inputs
                    else:
                        sf_schema = aslist(schema["secondaryFiles"])

                    for num, sf_entry in enumerate(sf_schema):
                        if "required" in sf_entry and sf_entry["required"] is not None:
                            required_result = self.do_eval(
                                sf_entry["required"], context=datum
                            )
                            if not (
                                isinstance(required_result, bool)
                                or required_result is None
                            ):
                                if sf_schema == schema["secondaryFiles"]:
                                    sf_item: Any = sf_schema[num]
                                else:
                                    sf_item = sf_schema
                                raise SourceLine(
                                    sf_item, "required", WorkflowException, debug
                                ).makeError(
                                    "The result of a expression in the field "
                                    "'required' must "
                                    f"be a bool or None, not a {type(required_result)}. "
                                    f"Expression '{sf_entry['required']}' resulted "
                                    f"in '{required_result}'."
                                )
                            sf_required = required_result
                        else:
                            sf_required = True

                        if "$(" in sf_entry["pattern"] or "${" in sf_entry["pattern"]:
                            sfpath = self.do_eval(sf_entry["pattern"], context=datum)
                        else:
                            sfpath = substitute(
                                cast(str, datum["basename"]), sf_entry["pattern"]
                            )

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                d_location = cast(str, datum["location"])
                                if "/" in d_location:
                                    sf_location = (
                                        d_location[0 : d_location.rindex("/") + 1]
                                        + sfname
                                    )
                                else:
                                    sf_location = d_location + sfname
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise SourceLine(
                                    sf_entry, "pattern", WorkflowException, debug
                                ).makeError(
                                    "Expected secondaryFile expression to "
                                    "return type 'str', a 'File' or 'Directory' "
                                    "dictionary, or a list of the same. Received "
                                    f"'{type(sfname)} from '{sf_entry['pattern']}'."
                                )

                            for d in cast(
                                MutableSequence[MutableMapping[str, str]],
                                datum["secondaryFiles"],
                            ):
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1 :
                                    ]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[CWLObjectType],
                                    newsf: CWLObjectType,
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        sfname,
                                    )
                                elif discover_secondaryFiles and self.fs_access.exists(
                                    sf_location
                                ):
                                    addsf(
                                        cast(
                                            MutableSequence[CWLObjectType],
                                            datum["secondaryFiles"],
                                        ),
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise SourceLine(
                                        schema,
                                        "secondaryFiles",
                                        WorkflowException,
                                        debug,
                                    ).makeError(
                                        "Missing required secondary file '%s' from file object: %s"
                                        % (sfname, json_dumps(datum, indent=4))
                                    )

                    normalizeFilesDirs(
                        cast(MutableSequence[CWLObjectType], datum["secondaryFiles"])
                    )

                if "format" in schema:
                    eval_format: Any = self.do_eval(schema["format"])
                    if isinstance(eval_format, str):
                        evaluated_format: Union[str, List[str]] = eval_format
                    elif isinstance(eval_format, MutableSequence):
                        for index, entry in enumerate(eval_format):
                            message = None
                            if not isinstance(entry, str):
                                message = (
                                    "An expression in the 'format' field must "
                                    "evaluate to a string, or list of strings. "
                                    "However a non-string item was received: "
                                    f"'{entry}' of type '{type(entry)}'. "
                                    f"The expression was '{schema['format']}' and "
                                    f"its fully evaluated result is '{eval_format}'."
                                )
                            if expression.needs_parsing(entry):
                                message = (
                                    "For inputs, 'format' field can either "
                                    "contain a single CWL Expression or CWL Parameter "
                                    "Reference, a single format string, or a list of "
                                    "format strings. But the list cannot contain CWL "
                                    "Expressions or CWL Parameter References. List "
                                    f"entry number {index+1} contains the following "
                                    "unallowed CWL Parameter Reference or Expression: "
                                    f"'{entry}'."
                                )
                            if message:
                                raise SourceLine(
                                    schema["format"], index, WorkflowException, debug
                                ).makeError(message)
                        evaluated_format = cast(List[str], eval_format)
                    else:
                        raise SourceLine(
                            schema, "format", WorkflowException, debug
                        ).makeError(
                            "An expression in the 'format' field must "
                            "evaluate to a string, or list of strings. "
                            "However the type of the expression result was "
                            f"{type(eval_format)}. "
                            f"The expression was '{schema['format']}' and "
                            f"its fully evaluated result is 'eval_format'."
                        )
                    try:
                        check_format(
                            datum,
                            evaluated_format,
                            self.formatgraph,
                        )
                    except ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve)
                        ) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "org.w3id.cwl.cwl.Directory":
                datum = cast(CWLObjectType, datum)
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(
                        self.fs_access,
                        datum,
                        (ll == "deep_listing"),
                    )
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = cast(List[int], binding["position"]) + cast(
                    List[int], bi["position"]
                )
            bindings.append(binding)

        return bindings
Esempio n. 13
0
        def _service_to_k8s_container(name, config, container_name=None):
            container = CommentedMap()

            if container_name:
                container['name'] = container_name
            else:
                container['name'] = container['name'] if config.get(
                    'container_name') else name

            container['securityContext'] = CommentedMap()
            container['state'] = 'present'
            volumes = []

            for key, value in iteritems(config):
                if key in self.IGNORE_DIRECTIVES:
                    pass
                elif key == 'cap_add':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(
                            add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities'][
                                'add'].append(
                                    self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]
                                )
                elif key == 'cap_drop':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(
                            add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities'][
                                'drop'].append(
                                    self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]
                                )
                elif key == 'command':
                    if isinstance(value, string_types):
                        container['args'] = shlex.split(value)
                    else:
                        container['args'] = copy.copy(value)
                elif key == 'container_name':
                    pass
                elif key == 'entrypoint':
                    if isinstance(value, string_types):
                        container['command'] = shlex.split(value)
                    else:
                        container['command'] = copy.copy(value)
                elif key == 'environment':
                    expanded_vars = self.expand_env_vars(value)
                    if expanded_vars:
                        if 'env' not in container:
                            container['env'] = []

                        container['env'].extend(expanded_vars)
                elif key in ('ports', 'expose'):
                    if not container.get('ports'):
                        container['ports'] = []
                    self.add_container_ports(value, container['ports'])
                elif key == 'privileged':
                    container['securityContext']['privileged'] = value
                elif key == 'read_only':
                    container['securityContext'][
                        'readOnlyRootFileSystem'] = value
                elif key == 'stdin_open':
                    container['stdin'] = value
                elif key == 'volumes':
                    vols, vol_mounts = self.get_k8s_volumes(value)
                    if vol_mounts:
                        if 'volumeMounts' not in container:
                            container['volumeMounts'] = []

                        container['volumeMounts'].extend(vol_mounts)
                    if vols:
                        volumes += vols
                elif key == 'secrets':
                    for secret, secret_config in iteritems(value):
                        if self.CONFIG_KEY in secret_config:
                            vols, vol_mounts, env_variables = self.get_k8s_secrets(
                                secret, secret_config[self.CONFIG_KEY])

                            if vol_mounts:
                                if 'volumeMounts' not in container:
                                    container['volumeMounts'] = []

                                container['volumeMounts'].extend(vol_mounts)

                            if vols:
                                volumes += vols

                            if env_variables:
                                if 'env' not in container:
                                    container['env'] = []

                                container['env'].extend(env_variables)
                elif key == 'working_dir':
                    container['workingDir'] = value
                else:
                    container[key] = value
            return container, volumes
Esempio n. 14
0
    def bind_input(
        self,
        schema: MutableMapping[str, Any],
        datum: Any,
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[List[int]] = None,
    ) -> List[MutableMapping[str, Any]]:

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, str]]
        binding = {}  # type: Union[MutableMapping[str, str], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position,
                              str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    binding["position"] = self.do_eval(position, context=datum)
                    bp.append(binding["position"])
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif (isinstance(t, MutableMapping) and "name" in t
                      and self.names.has_name(t["name"], "")):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(
                    "'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (binding and "inputBinding" not in st and "type" in st
                    and st["type"] == "array"
                    and "itemSeparator" not in binding):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    ))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                datum[f["name"]],
                                lead_pos=lead_pos,
                                tail_pos=f["name"],
                                discover_secondaryFiles=discover_secondaryFiles,
                            ))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {"type": schema["items"], "inputBinding": b2}
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        ))
                binding = {}

            def _capture_files(f):  # type: (Dict[str, str]) -> Dict[str, str]
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")
                    ) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = content_limit_respected_read(f)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if "required" in sf:
                            sf_required = self.do_eval(sf["required"],
                                                       context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"],
                                                sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                sf_location = (
                                    datum["location"]
                                    [0:datum["location"].rindex("/") + 1] +
                                    sfname)
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise WorkflowException(
                                    "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'"
                                    % (type(sfname)))

                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[MutableMapping[
                                        str, Any]],
                                    newsf: MutableMapping[str, Any],
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(datum["secondaryFiles"], sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(
                                        sf_location):
                                    addsf(
                                        datum["secondaryFiles"],
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" %
                            (schema["name"], schema["format"], ve)) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Esempio n. 15
0
        def _service_to_container(name, service):
            container = CommentedMap()
            container['name'] = name
            container['securityContext'] = CommentedMap()
            container['state'] = 'present'

            volumes = []
            pod = {}
            for key, value in service.items():
                if key in self.IGNORE_DIRECTIVES:
                    pass
                elif key == 'cap_add':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities']['add'].append(
                                self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap])
                elif key == 'cap_drop':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities']['drop'].append(
                                self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap])
                elif key == 'command':
                    if isinstance(value, string_types):
                        container['args'] = shlex.split(value)
                    else:
                        container['args'] = value
                elif key == 'container_name':
                        container['name'] = value
                elif key == 'entrypoint':
                    if isinstance(value, string_types):
                        container['command'] = shlex.split(value)
                    else:
                        container['command'] = copy.copy(value)
                elif key == 'environment':
                    expanded_vars = self.expand_env_vars(value)
                    if expanded_vars:
                        container['env'] = expanded_vars
                elif key in ('ports', 'expose'):
                    if not container.get('ports'):
                        container['ports'] = []
                    self.add_container_ports(value, container['ports'])
                elif key == 'privileged':
                    container['securityContext']['privileged'] = value
                elif key == 'read_only':
                    container['securityContext']['readOnlyRootFileSystem'] = value
                elif key == 'stdin_open':
                    container['stdin'] = value
                elif key == 'volumes':
                    vols, vol_mounts = self.get_k8s_volumes(value)
                    if vol_mounts:
                        container['volumeMounts'] = vol_mounts
                    if vols:
                        volumes += vols
                elif key == 'working_dir':
                    container['workingDir'] = value
                else:
                    container[key] = value

            # Translate options:
            if service.get(self.CONFIG_KEY):
                for key, value in service[self.CONFIG_KEY].items():
                    if key == 'deployment':
                        for deployment_key, deployment_value in value.items():
                            if deployment_key != 'force':
                                self.copy_attribute(pod, deployment_key, deployment_value)

            return container, volumes, pod
Esempio n. 16
0
        def _service_to_container(name, service):
            container = CommentedMap()
            container['name'] = name
            container['securityContext'] = CommentedMap()
            container['state'] = 'present'

            volumes = []
            pod = {}
            for key, value in service.items():
                if key in self.IGNORE_DIRECTIVES:
                    pass
                elif key == 'cap_add':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(
                            add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities'][
                                'add'].append(
                                    self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]
                                )
                elif key == 'cap_drop':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(
                            add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities'][
                                'drop'].append(
                                    self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]
                                )
                elif key == 'command':
                    if isinstance(value, string_types):
                        container['args'] = shlex.split(value)
                    else:
                        container['args'] = value
                elif key == 'container_name':
                    container['name'] = value
                elif key == 'entrypoint':
                    if isinstance(value, string_types):
                        container['command'] = shlex.split(value)
                    else:
                        container['command'] = copy.copy(value)
                elif key == 'environment':
                    expanded_vars = self.expand_env_vars(value)
                    if expanded_vars:
                        container['env'] = expanded_vars
                elif key in ('ports', 'expose'):
                    if not container.get('ports'):
                        container['ports'] = []
                    self.add_container_ports(value, container['ports'])
                elif key == 'privileged':
                    container['securityContext']['privileged'] = value
                elif key == 'read_only':
                    container['securityContext'][
                        'readOnlyRootFileSystem'] = value
                elif key == 'stdin_open':
                    container['stdin'] = value
                elif key == 'volumes':
                    vols, vol_mounts = self.get_k8s_volumes(value)
                    if vol_mounts:
                        container['volumeMounts'] = vol_mounts
                    if vols:
                        volumes += vols
                elif key == 'working_dir':
                    container['workingDir'] = value
                else:
                    container[key] = value

            # Translate options:
            if service.get(self.CONFIG_KEY):
                for key, value in service[self.CONFIG_KEY].items():
                    if key == 'deployment':
                        for deployment_key, deployment_value in value.items():
                            if deployment_key != 'force':
                                self.copy_attribute(pod, deployment_key,
                                                    deployment_value)

            return container, volumes, pod
Esempio n. 17
0
 def _recursive_get(self, data: CommentedMap, key: str, default_value: Any) -> Any:
     key, next_key = self._parse_key(key)
     if next_key is not None:
         next_data = data.get(key, CommentedMap())
         return self._recursive_get(next_data, next_key, default_value)
     return data.get(key, default_value)
Esempio n. 18
0
def main(docname):

    with open(docname, "r") as fi:
        lines = fi.readlines()
    context = {}
    rest_lines = []
    for line in lines:
        # print(line)
        if "{%" in line:
            set_expr = re.search("{%(.*)%}", line)
            set_expr = set_expr.group(1)
            set_expr = set_expr.replace("set", "", 1).strip()
            exec(set_expr, globals(), context)
        else:
            rest_lines.append(line)

    yaml = YAML(typ="rt")
    yaml.preserve_quotes = True
    yaml.default_flow_style = False
    yaml.indent(sequence=4, offset=2)
    yaml.width = 1000
    yaml.Representer = MyRepresenter
    yaml.Loader = ruamel.yaml.RoundTripLoader

    result_yaml = CommentedMap()
    result_yaml["context"] = context

    def has_selector(s):
        return s.strip().endswith("]")

    quoted_lines = []
    for line in rest_lines:
        if has_selector(line):
            selector_start = line.rfind("[")
            selector_end = line.rfind("]")
            selector_content = line[selector_start + 1 : selector_end]

            if line.strip().startswith("-"):
                line = (
                    line[: line.find("-") + 1]
                    + f" sel({selector_content}): "
                    + line[
                        line.find("-") + 1 : min(line.rfind("#"), line.rfind("["))
                    ].strip()
                    + "\n"
                )
        quoted_lines.append(line)
    rest_lines = quoted_lines

    def check_if_quoted(s):
        s = s.strip()
        return s.startswith('"') or s.startswith("'")

    quoted_lines = []
    for line in rest_lines:
        if "{{" in line:
            # make sure that jinja stuff is quoted
            if line.find(":") != -1:
                idx = line.find(":")
            elif line.strip().startswith("-"):
                idx = line.find("-")
            rest = line[idx + 1 :]

            if not check_if_quoted(rest):
                if "'" in rest:
                    rest = rest.replace("'", '"')

                line = line[: idx + 1] + f" '{rest.strip()}'\n"
        quoted_lines.append(line)
    rest_lines = quoted_lines

    skips, wo_skip_lines = [], []
    for line in rest_lines:
        if line.strip().startswith("skip"):
            parts = line.split(":")
            rhs = parts[1].strip()
            if rhs.startswith("true"):
                selector_start = line.rfind("[")
                selector_end = line.rfind("]")
                selector_content = line[selector_start + 1 : selector_end]
                skips.append(selector_content)
            else:
                print("ATTENTION skip: false not handled!")
        else:
            wo_skip_lines.append(line)

    rest_lines = wo_skip_lines
    result_yaml.update(
        ruamel.yaml.load("".join(rest_lines), ruamel.yaml.RoundTripLoader)
    )

    if len(skips) != 0:
        result_yaml["build"]["skip"] = skips

    if result_yaml.get("outputs"):
        for o in result_yaml["outputs"]:
            name = o["name"]
            package = {"name": name}
            del o["name"]
            if o.get("version"):
                package["version"] = o["version"]
                del o["version"]

            build = {}
            if o.get("script"):
                build["script"] = o["script"]
                del o["script"]

            o["package"] = package
            o["build"] = build

        for d in result_yaml["outputs"]:
            print(order_output_dict(d))
        result_yaml["outputs"] = [order_output_dict(d) for d in result_yaml["outputs"]]

    from io import StringIO

    output = StringIO()
    yaml.dump(result_yaml, output)

    # Hacky way to insert an empty line after the context-key-object
    context_output = StringIO()
    yaml.dump(context, context_output)
    context_output = context_output.getvalue()
    context_output_len = len(context_output.split("\n"))

    final_result = output.getvalue()
    final_result_lines = final_result.split("\n")
    final_result_lines.insert(context_output_len, "")

    print("\n".join(final_result_lines))
Esempio n. 19
0
class RootManifest:
    """
    Manifest: defaults to the "vault.yml" file in
    the current directory.
    """
    def __init__(self, path: str = None, load: bool = True):
        self.path = os.path.abspath(path) if path else None
        self._backing = CommentedMap()

        if load:
            self._load()

        self._changes = deepcopy(self._backing)

    def _load(self):
        if os.path.exists(self.path):
            with open(self.path, mode="r") as f:
                self._backing.update(yaml.safe_load(f))

    def set_header(self, header: str) -> None:
        self._header = header

    def create_secrets_backend_section(self) -> None:
        if "secrets_backends" not in self._changes:
            self._changes["secrets_backends"] = {}
            self._changes.yaml_set_comment_before_after_key(
                "secrets_backends",
                before=
                "Secrets backends. Each key is the mount to a secrets engine.")

    def add_secrets_backend(self, name: str, manifest: ManifestItem) -> None:
        converted = manifest.convert()
        if not converted:
            return

        name = name.strip("/")
        new_dict = self._changes["secrets_backends"].get(name, {})
        new_dict.update(converted)
        self._changes["secrets_backends"][name] = new_dict

    def delete_secrets_backend(self, name: str) -> None:
        name = name.strip("/")

        if "secrets_backends" in self._changes and name in self._changes[
                "secrets_backends"]:
            del self._changes["secrets_backends"][name]

    def list_secrets_backend_names(self) -> List[str]:
        return [
            name.strip("/")
            for name in self._backing.get("secrets_backends", {})
        ]

    def create_auth_method_section(self) -> None:
        if "auth_methods" not in self._changes:
            self._changes["auth_methods"] = {}
            self._changes.yaml_set_comment_before_after_key(
                "auth_methods",
                before=
                "Authentication methods. Each key is the name of the auth method."
            )

    def add_auth_method(self, name: str, manifest: ManifestItem) -> None:
        converted = manifest.convert()
        if not converted:
            return

        if "auth_methods" not in self._changes:
            self._changes["auth_methods"] = {}

        name = name.strip("/")
        new_dict = self._changes["auth_methods"].get(name, {})
        new_dict.update(converted)
        self._changes["auth_methods"][name] = new_dict

    def yaml(self) -> str:
        output = ""
        if self._header:
            for line in self._header.split("\n"):
                output += f"# {line}\n"
            output += "\n"

        output += yaml.round_trip_dump(self._changes)
        return output

    def save(self) -> None:
        with open(self.path, "w") as f:
            f.write(self.yaml())
Esempio n. 20
0
class YAMLRoundtripConfig(MutableConfigFile, MutableAbstractItemAccessMixin, MutableAbstractDictFunctionsMixin):
    """
    Class for YAML-based (roundtrip) configurations
    """

    def __init__(self, owner: Any, manager: "m.StorageManager", path: str, *args: List[Any], **kwargs: Dict[Any, Any]):
        self.data = CommentedMap()

        super().__init__(owner, manager, path, *args, **kwargs)

    def load(self):
        with open(self.path, "r") as fh:
            self.data = yaml.round_trip_load(fh, version=(1, 2))

    def reload(self):
        self.unload()
        self.load()

    def unload(self):
        self.data.clear()

    def save(self):
        if not self.mutable:
            raise RuntimeError("You may not modify a defaults file at runtime - check the mutable attribute!")

        with open(self.path, "w") as fh:
            yaml.round_trip_dump(self.data, fh)

    # region: CommentedMap functions

    def insert(self, pos, key, value, *, comment=None):
        """
        Insert a `key: value` pair at the given position, attaching a comment if provided

        Wrapper for `CommentedMap.insert()`
        """

        return self.data.insert(pos, key, value, comment)

    def add_eol_comment(self, comment, *, key=NoComment, column=30):
        """
        Add an end-of-line comment for a key at a particular column (30 by default)

        Wrapper for `CommentedMap.yaml_add_eol_comment()`
        """

        # Setting the column to None as the API actually defaults to will raise an exception, so we have to
        # specify one unfortunately

        return self.data.yaml_add_eol_comment(comment, key=key, column=column)

    def set_comment_before_key(self, key, comment, *, indent=0):
        """
        Set a comment before a given key

        Wrapper for `CommentedMap.yaml_set_comment_before_after_key()`
        """

        return self.data.yaml_set_comment_before_after_key(
            key, before=comment, indent=indent, after=None, after_indent=None
        )

    def set_start_comment(self, comment, indent=0):
        """
        Set the starting comment

        Wrapper for `CommentedMap.yaml_set_start_comment()`
        """

        return self.data.yaml_set_start_comment(comment, indent=indent)

    # endregion

    # region: Dict functions

    def clear(self):
        return self.data.clear()

    def copy(self):
        return self.data.copy()

    def get(self, key, default=None):
        return self.data.get(key, default)

    def items(self):
        return self.data.items()

    def keys(self):
        return self.data.keys()

    def pop(self, key, default=None):
        return self.data.pop(key, default)

    def popitem(self):
        return self.data.popitem()

    def setdefault(self, key, default=None):
        if key not in self.data:
            self.data[key] = default
            return default

        return self.data[key]

    def update(self, other):
        return self.data.update(other)

    def values(self):
        return self.data.values()

    # endregion

    # Item access functions

    def __contains__(self, key):
        """
        Wrapper for `dict.__contains__()`
        """

        return self.data.__contains__(key)

    def __delitem__(self, key):
        """
        Wrapper for `dict.__delitem__()`
        """

        del self.data[key]

    def __getitem__(self, key):
        """
        Wrapper for `dict.__getitem__()`
        """

        return self.data.__getitem__(key)

    def __iter__(self):
        """
        Wrapper for `dict.__iter__()`
        """

        return self.data.__iter__()

    def __len__(self):
        """
        Wrapper for `dict.__len__()`
        """

        return self.data.__len__()

    def __setitem__(self, key, value):
        """
        Wrapper for `dict.__getitem__()`
        """

        return self.data.__setitem__(key, value)
Esempio n. 21
0
 def _recursive_get(self, data: CommentedMap, key: str, default_value: Any) -> Any:
     if '.' in key:
         key, next_key = key.split('.', 1)
         next_data = data.get(key, CommentedMap())
         return self._recursive_get(next_data, next_key, default_value)
     return data.get(key, default_value)
Esempio n. 22
0
        def _service_to_k8s_container(name, config, container_name=None):
            container = CommentedMap()

            if container_name:
                container['name'] = container_name
            else:
                container['name'] = container['name'] if config.get('container_name') else name

            container['securityContext'] = CommentedMap()
            container['state'] = 'present'
            volumes = []

            for key, value in iteritems(config):
                if key in self.IGNORE_DIRECTIVES:
                    pass
                elif key == 'cap_add':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities']['add'].append(
                                self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap])
                elif key == 'cap_drop':
                    if not container['securityContext'].get('Capabilities'):
                        container['securityContext']['Capabilities'] = dict(add=[], drop=[])
                    for cap in value:
                        if self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap]:
                            container['securityContext']['Capabilities']['drop'].append(
                                self.DOCKER_TO_KUBE_CAPABILITY_MAPPING[cap])
                elif key == 'command':
                    if isinstance(value, string_types):
                        container['args'] = shlex.split(value)
                    else:
                        container['args'] = copy.copy(value)
                elif key == 'container_name':
                    pass
                elif key == 'entrypoint':
                    if isinstance(value, string_types):
                        container['command'] = shlex.split(value)
                    else:
                        container['command'] = copy.copy(value)
                elif key == 'environment':
                    expanded_vars = self.expand_env_vars(value)
                    if expanded_vars:
                        if 'env' not in container:
                            container['env'] = []

                        container['env'].extend(expanded_vars)
                elif key in ('ports', 'expose'):
                    if not container.get('ports'):
                        container['ports'] = []
                    self.add_container_ports(value, container['ports'])
                elif key == 'privileged':
                    container['securityContext']['privileged'] = value
                elif key == 'read_only':
                    container['securityContext']['readOnlyRootFileSystem'] = value
                elif key == 'stdin_open':
                    container['stdin'] = value
                elif key == 'volumes':
                    vols, vol_mounts = self.get_k8s_volumes(value)
                    if vol_mounts:
                        if 'volumeMounts' not in container:
                            container['volumeMounts'] = []

                        container['volumeMounts'].extend(vol_mounts)
                    if vols:
                        volumes += vols
                elif key == 'secrets':
                    for secret, secret_config in iteritems(value):
                        if self.CONFIG_KEY in secret_config:
                            vols, vol_mounts, env_variables = self.get_k8s_secrets(secret, secret_config[self.CONFIG_KEY])

                            if vol_mounts:
                                if 'volumeMounts' not in container:
                                    container['volumeMounts'] = []

                                container['volumeMounts'].extend(vol_mounts)

                            if vols:
                                volumes += vols

                            if env_variables:
                                if 'env' not in container:
                                    container['env'] = []

                                container['env'].extend(env_variables)
                elif key == 'working_dir':
                    container['workingDir'] = value
                else:
                    container[key] = value
            return container, volumes