Example #1
0
def prepare_tool(
    tool: Tool,
    toolversions: List[str],
    isorphan: bool,
    is_published_pipeline: bool = False,
):
    # Stuff to list on the documentation page:
    #   - Versions of tools
    #   - Generated command
    #   - Cool if it grouped the tools by vendor
    #   -

    if not tool:
        return None
    try:
        if is_published_pipeline:
            return ""
        if tool.type() == ToolType.CommandTool:
            return prepare_commandtool_page(tool, toolversions)
        elif tool.type() == ToolType.Workflow:
            return prepare_workflow_page(tool, toolversions)
        elif tool.type() == ToolType.CodeTool:
            return prepare_code_tool_page(tool, toolversions)
    except Exception as e:
        traceback.print_exc()
        Logger.critical("Couldn't generate documentation for " + tool.id() +
                        " " + str(e))
Example #2
0
def prepare_quickstart(tool: Tool):
    required_python_input_map = "\n".join(" " * 15 + i.id() + "=None,"
                                          for i in tool.tool_inputs()
                                          if not i.intype.optional)

    python_step_name = tool.id().lower() + "_step"
    output_python_code = "\n".join(
        " " * 7 + f'wf.output("{o.id()}", source={python_step_name}.{o.id()})'
        for o in tool.tool_outputs())
    python_codeblock = f"""\
    .. code-block:: python

       from {tool.__module__} import {tool.__class__.__name__}

       wf = WorkflowBuilder("myworkflow")

       wf.step(
           "{python_step_name}",
           {tool.__class__.__name__}(
{required_python_input_map}
           )
       )
{output_python_code}
    """

    return f"""\
Example #3
0
def prepare_run_instructions_input_file(tool: Tool, user_inps: dict,
                                        other_inps: dict,
                                        reference_information: str):
    yaml_user_inps = CwlTranslator.stringify_translated_inputs(user_inps)
    yaml_other_inps = CwlTranslator.stringify_translated_inputs(other_inps)
    indented_user = "".join(" " * 7 + s
                            for s in yaml_user_inps.splitlines(True))
    indented_other = "".join(" " * 7 + s
                             for s in yaml_other_inps.splitlines(True))

    not_localising_secondary_warning = ""
    if isinstance(tool, WorkflowBase):
        inputs_that_arent_localising_secondary_files = [
            t.id() for t in tool.tool_inputs()
            if t.doc.skip_sourcing_secondary_files
        ]
        if len(inputs_that_arent_localising_secondary_files) > 0:
            not_localising_secondary_warning = f"""\
.. warning::

   The secondary files for the inputs '{"', '".join(inputs_that_arent_localising_secondary_files)}' will not automatically \
   localise using janis prepare and are built just after download. Please note this can take a few hours to build \
   before the pipeline runs. 
"""

    has_static = len(other_inps) > 0

    tb = " " * 4
    run_args = ["janis run [...run options]", tb + "--inputs inputs.yaml"]

    static_generation = ("" if not has_static else f"""\
   # static inputs
   janis inputs --static {tool.id()} > static.yaml""")
    static_yaml = ("" if not has_static else f"""\
**static.yaml**

.. code-block:: yaml

{indented_other}""")
    if has_static:
        run_args.append(tb + "--inputs static.yaml")

    if isinstance(tool, CommandTool) and not tool.container():
        run_args.append(
            tb +
            f"--container-override '{tool.id()}=<organisation/container:version>'"
        )

    run_args.append(tb + tool.id())
    run_statement = " \\\n".join(" " * 3 + el for el in run_args)

    if reference_information:
        reference_information = f"The following inputs have a suggested source. Using janis prepare with the relevant \
        ``--source-hint`` will automatically download these files. See `below <#additional-configuration-inputs>`_ for \
        more information about inputs for {tool.id()}.\n{reference_information}"

    return f"""\
Example #4
0
def validate_inputs(tool: Tool, additional_inputs):
    errors = {}

    input_values_from_workflow = {}
    if isinstance(tool, Workflow):
        input_values_from_workflow = {
            inpkey: inp.value
            for inpkey, inp in tool.input_nodes.items() if inp.value
        }
    input_values_to_use = {**input_values_from_workflow, **additional_inputs}

    for inp in tool.tool_inputs():
        inpkey = inp.id()
        value = input_values_to_use.get(inpkey)

        if inp.intype.validate_value(value, allow_null_if_not_optional=False):
            continue

        errors[inpkey] = (
            inp.intype.invalid_value_hint(value) or
            f"An internal error occurred when validating {inpkey} from {inp.intype.id()}"
        )

    if len(errors) == 0:
        return True

    raise ValueError(f"There were errors in {len(errors)} inputs: " +
                     str(errors))
Example #5
0
    def evaluate_translation(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if we can successfully translate to wdl and cwl
        # TODO: validate translations (will look into better way to ensure validation tool exists)

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if we can successfully translate to wdl and cwl
        :rtype: Union[str, bool]
        """
        engines = test_helpers.get_available_engines()
        output_dir = os.path.join(os.getcwd(), "tests_output", tool.id())

        errors = []
        for engine in engines:
            try:
                translator = engines[engine]
                translator.translate(
                    tool, export_path=output_dir, to_console=False, to_disk=True
                )
            except Exception as e:
                errors.append(f"{translator.name}: translation failed {str(e)}")

        if errors:
            return ", ".join(errors)

        return True
def cascade_batchrun_inputs(workflow: Tool, inputs: List[Dict],
                            options: BatchRunRequirements):
    fields_to_group = set(options.fields)
    fields_to_group.add(options.groupby)

    wfins = workflow.inputs_map()

    required_ar_depth_of_groupby_fields = {
        f: 1 + count_janisarray_depth(wfins[f].intype)
        for f in fields_to_group
    }

    ins = {}

    for inp in inputs:
        for k, v in inp.items():
            if k in fields_to_group:
                if k not in ins:
                    ins[k] = []

                # We'll look at the shape of the data, and decide whether
                # we can just use the value, or we need to wrap it in another array
                if count_array_depth(
                        v) < required_ar_depth_of_groupby_fields[k]:
                    v = [v]
                ins[k].extend(v)
            else:
                # overwrite the previous value
                ins[k] = v

    # If inputs
    return ins
    def check_existence_of_files(wf: Tool, inputs: Dict):

        doesnt_exist = {}

        for inp in wf.tool_inputs():
            intype = inp.intype
            is_path = isinstance(intype, (File, Directory))
            is_array_of_paths = isinstance(intype, Array) and isinstance(
                intype.fundamental_type(), (File, Directory))

            if not (is_path or is_array_of_paths):
                continue

            val = inputs.get(inp.id())
            if val is None:
                if inp.intype.optional:
                    continue
                raise Exception(
                    f"Expected input '{inp.id()}' was not found or is null")

            doesnt_exist.update(
                InputChecker.check_base_with_type(inp, intype, val))

        if len(doesnt_exist) > 0:
            import ruamel.yaml

            stringified = ruamel.yaml.dump(doesnt_exist,
                                           default_flow_style=False)
            raise Exception("The following inputs were not found:\n" +
                            stringified)
    def inputs_modifier(self, tool: Tool, inputs: Dict,
                        hints: Dict[str, str]) -> Dict:
        new_inputs = {}
        for inp in tool.inputs_map().values():
            if inp.id() not in inputs:
                continue
            new_inputs[inp.id()] = self.process_single_input(
                inp.id(), inp.intype, inputs[inp.id()])

        return {**inputs, **new_inputs}
Example #9
0
def prepare_run_instructions(tool: Tool):
    metadata = tool.bind_metadata() or tool.metadata
    has_array_of_arrays_inps = True
    bla = any(
        (isinstance(i.intype, Array) and isinstance(i.intype.subtype(), Array))
        for i in tool.tool_inputs())

    static_input_tuples = [[
        i.id(), i.intype.id(),
        prepare_source(i.doc.source)
    ] for i in tool.tool_inputs() if i.doc.quality == InputQualityType.static
                           and i.doc.source is not None]

    reference_information = ""

    if len(static_input_tuples) > 0:
        static_input_headers = ["Name", "Type", "Source"]
        reference_information = tabulate(static_input_tuples,
                                         headers=static_input_headers,
                                         tablefmt="rst")

    # overrides = metadata.sample_input_overrides or {}
    user_inps = {}
    other_inps = {}

    for i in tool.tool_inputs():
        if i.intype.optional or i.default is not None:
            continue

        val = i.doc.example or prepare_default_for_type(i.id(), i.intype)
        if i.doc and i.doc.quality and i.doc.quality != InputQualityType.user:
            other_inps[i.id()] = val
        else:
            # catch None and InputQualityType.user
            user_inps[i.id()] = val

    if has_array_of_arrays_inps:
        return prepare_run_instructions_input_file(tool, user_inps, other_inps,
                                                   reference_information)
    else:
        return prepare_run_instructions_cli(tool, user_inps, other_inps,
                                            reference_information)
Example #10
0
    def evaluate(cls, tool: Tool) -> Union[str, bool]:
        """
        Evaluate a Janis tool whether they satisfy certain criteria for them to be publishable

        :param tool: Janis tool
        :type tool: Tool

        :return: error message or True if valid
        :rtype: Union[str, bool]
        """
        if tool.skip_test():
            return cls.STATUS_SKIPPED

        if tool.type() == ToolType.Workflow:
            return cls.evaluate_workflow(tool)
        elif tool.type() == ToolType.CommandTool:
            return cls.evaluate_command_tool(tool)
        elif tool.type() == ToolType.CodeTool:
            return cls.evaluate_code_tool(tool)
        raise Exception("Unrecognised tool type: " + str(tool.type()))
def ensure_outputs_are_in_workflow_and_are_compatible(
        tool: Tool, outputs: List[str], compatible_type: DataType):
    tool_outputs: Dict[str, TOutput] = tool.outputs_map()
    failed_outputs = []
    untyped_outputs = []

    for o in outputs:
        if o not in tool_outputs:
            failed_outputs.append(o)
        elif not compatible_type.can_receive_from(tool_outputs[o].outtype):
            untyped_outputs.append(o)

    return failed_outputs, untyped_outputs
Example #12
0
    def evaluate_container(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if the container specified for this tool exists in the remote registry

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if listed container for this tool exists in the remote registry
        :rtype: Union[str, bool]
        """
        # If there is no container, we don't need to check if the container exists in the registry
        if not tool.containers():
            return True

        # Some tool might not have container, we only want to check if a container is listed, its digest exists
        containers = [v for k, v in tool.containers().items()]
        containers = list(filter(None, containers))

        if not containers:
            return True

        test_helpers.verify_janis_assistant_installed()
        from janis_assistant.data.container import get_digests_from_containers

        cache_location = os.path.join(os.getcwd(), "tests_output", "containers")
        digests = get_digests_from_containers(containers, cache_location=cache_location)

        errors = []
        for c in containers:
            # if digest is exactly the same, it means digest is not found (it's just the tag name)
            if c not in digests or digests[c] == c:
                # if the container nameis already using hash, we don't want to report any issue here
                if not "@sha256:" in c:
                    errors.append(f"container {c} not found")

        if errors:
            return ", ".join(errors)

        return True
Example #13
0
    def evaluate_unittest_exists(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if test suite for this tool is provided

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if unit tests for this tool exists
        :rtype: Union[str, bool]
        """
        if tool.tests():
            return True

        return "Mising unit tests"
Example #14
0
    def evaluate_tool_module(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if tool module name for documentation is provided

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if tool module name for this tool exists
        :rtype: Union[str, bool]
        """
        if not tool.tool_module():
            return "Missing tool module"

        return True
Example #15
0
    def evaluate_friendly_name(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if a friendly name for documentation is provided

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if a friendly name for this tool exists
        :rtype: Union[str, bool]
        """
        if not tool.friendly_name():
            return "Missing friendly name"

        return True
Example #16
0
File: utils.py Project: junyk/janis
def prepare_run_instructions_input_file(tool: Tool, user_inps: dict,
                                        other_inps: dict,
                                        reference_information: str):
    yaml_user_inps = CwlTranslator.stringify_translated_inputs(user_inps)
    yaml_other_inps = CwlTranslator.stringify_translated_inputs(other_inps)
    indented_user = "".join(" " * 7 + s
                            for s in yaml_user_inps.splitlines(True))
    indented_other = "".join(" " * 7 + s
                             for s in yaml_other_inps.splitlines(True))

    has_static = len(other_inps) > 0

    tb = " " * 4
    run_args = ["janis run [...run options]", tb + "--inputs inputs.yaml"]

    static_generation = ("" if not has_static else f"""\
   # static inputs
   janis inputs --static {tool.id()} > static.yaml""")
    static_yaml = ("" if not has_static else f"""\
**static.yaml**

.. code-block:: yaml

{indented_other}""")
    if has_static:
        run_args.append(tb + "--inputs static.yaml")

    if isinstance(tool, CommandTool) and not tool.container():
        run_args.append(
            tb +
            f"--container-override '{tool.id()}=<organisation/container:version>'"
        )

    run_args.append(tb + tool.id())
    run_statement = " \\\n".join(" " * 3 + el for el in run_args)

    return f"""\
Example #17
0
    def inputs_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Dict:

        new_inputs = {}
        for inp in tool.tool_inputs():
            if not isinstance(inp.intype, File):
                continue
            value = inputs.get(inp.id())
            if value is not None:
                processed_value = self.check_input_for_correctness(
                    inp.id(), inp.intype, value
                )
                if processed_value is not None:
                    new_inputs[inp.id()] = processed_value

        return {**inputs, **new_inputs}
    def evaluate_output_params(self, wf: Tool, additional_inputs: dict):

        mapped_inps = CwlTranslator().build_inputs_file(
            wf, recursive=False, additional_inputs=additional_inputs
        )
        output_names: Dict[str, any] = {}
        output_folders: Dict[str, any] = {}

        if isinstance(wf, Workflow):
            for o in wf.output_nodes.values():
                output_names[o.id()] = self.evaluate_output_selector(
                    o.output_name, mapped_inps
                )
                output_folders[o.id()] = self.evaluate_output_selector(
                    o.output_folder, mapped_inps
                )

        outputs: List[WorkflowOutputModel] = []

        for o in wf.tool_outputs():
            # We'll
            ext = None
            innertype = o.outtype
            iscopyable = isinstance(o.outtype, (File, Directory)) or (
                isinstance(o.outtype, Array)
                and isinstance(o.outtype.fundamental_type(), (File, Directory))
            )
            while isinstance(innertype, Array):
                innertype = innertype.subtype()
            if isinstance(o.outtype, File):
                ext = o.outtype.extension
            outputs.append(
                WorkflowOutputModel(
                    tag=o.id(),
                    iscopyable=iscopyable,
                    original_path=None,
                    new_path=None,
                    timestamp=None,
                    output_name=output_names.get(o.id()),
                    output_folder=output_folders.get(o.id()),
                    secondaries=o.outtype.secondary_files(),
                    extension=ext,
                )
            )

        return self.database.outputsDB.insert_many(outputs)
    def inputs_modifier(self, wf: Tool, inputs: Dict, hints: Dict[str, str]):
        nin = {**inputs}
        inmap = wf.inputs_map()

        # Change the 'cwd' just for the scope of this block
        with Path(self.cwd):
            for tag, value in nin.items():
                if tag not in inmap:
                    # Only localise tags within the inputsdict that are in the tool inputs
                    # This might be a problem later for passthrough inputs (Janis doesn't support yet)
                    continue
                intype = inmap[tag].intype

                # If the type of the input is:
                #   (a) A File / Directory
                isfiletype = isinstance(intype.received_type(),
                                        (File, Directory))

                #   (b) Array with a fundamental type (recursively searches if nested array) of File or Directory
                isbasefiletype = isinstance(
                    intype.received_type(), Array) and isinstance(
                        intype.fundamental_type(), (File, Directory))

                # (Skip if not one of these conditions)
                if not (isfiletype or isbasefiletype):
                    try:
                        nin[tag] = intype.coerce_value_if_possible(value)
                    except Exception as e:
                        raise Exception(
                            f"Couldn't coerce the input for '{tag}' ({value}) to type '{intype}': {e}"
                        )
                    continue

                # Qualify the fully qualify the filepath
                try:
                    nin[tag] = self.fully_qualify_filename_array_or_single(
                        value)
                except Exception as e:
                    raise Exception(
                        f"Couldn't qualify the filename for the input '{tag}' ({value}) to type '{intype}': {e}"
                    )

            return nin
    def inputs_modifier(self, tool: Tool, inputs: Dict,
                        hints: Dict[str, str]) -> Dict:
        """
        Download remote files and reploce input with the local files

        :param tool: an instance of janis tool
        :type tool: janis_core.Tool
        :param inputs: a dictionary of tool inputs
        :type inputs: dict
        :param hints:
        :type hints: dict
        :return: modified input
        :rtype: dict
        """
        new_inputs = {}

        for inp in tool.tool_inputs():
            modification_required = False

            if isinstance(inp.intype, File) or (
                    isinstance(inp.intype, Array)
                    and isinstance(inp.intype.fundamental_type(), File)):
                if inp.id() in inputs and inputs[inp.id()] is not None:
                    modification_required = True

            if modification_required:
                source = inputs[inp.id()]
                basedir = self.cache_dir
                os.makedirs(basedir, exist_ok=True)

                new_inputs[inp.id()] = self.localise_inputs(
                    inp.id(),
                    inp.intype,
                    basedir,
                    source,
                    # mfranklin 2021-01-08:
                    # if we specify a remote input, and we're localising files, we should localise secondary files
                    localise_secondary_files=True,
                )

        return {**inputs, **new_inputs}
Example #21
0
    def evaluate_metadata(tool: Tool) -> Union[str, bool]:
        """
        Evaluate if important metadata for documentation is provided

        :param tool: Janis tool
        :type tool: Tool

        :return:  error message or True if all required metadata for this tool exists
        :rtype: Union[str, bool]
        """
        METADATA_KEY_CONTRIBUTORS = "contributors"
        METADATA_KEY_CREATED_DATE = "created date"
        METADATA_KEY_INSTITUTION = "institution"

        if isinstance(tool.metadata, Metadata):
            required = {
                METADATA_KEY_CONTRIBUTORS: tool.metadata.contributors,
                METADATA_KEY_CREATED_DATE: tool.metadata.dateCreated,
                METADATA_KEY_INSTITUTION: tool.metadata.institution,
            }

            missing = []
            for key, field in required.items():
                if field is None or not field:
                    missing.append(key)

            # special case, tool_provider() value overwrites contributors in the documentation
            if METADATA_KEY_INSTITUTION in missing:
                if tool.tool_provider():
                    missing.remove(METADATA_KEY_INSTITUTION)

            if missing:
                return f"Missing metadata: {', '.join(missing)}"
        # elif isinstance(self.metadata, ...):
        else:
            return "Incorrect metadata class"

        return True
Example #22
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:

        # Build custom pipeline

        w = WorkflowBuilder(tool.id(),
                            friendly_name=tool.friendly_name(),
                            version=tool.version())

        ins = tool.tool_inputs()
        insdict = {i.id(): i for i in ins}
        fields = set(self.batch.fields)

        inkeys = set(i.id() for i in ins)
        invalid_keys = fields - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"Couldn't create batchtool from fields {', '.join(invalid_keys)} "
                f"as they do not exist on '{tool.id()}'")

        if self.batch.groupby not in inputs:
            raise Exception(
                f"the group_by field '{self.batch.groupby}' was not found in the inputs"
            )

        innode_base = {}

        for i in ins:
            if i.id() in fields:
                continue

            default = i.default
            if isinstance(default, Selector):
                default = None

            innode_base[i.id()] = w.input(i.id(),
                                          i.intype,
                                          default=default,
                                          doc=i.doc)

        raw_groupby_values = inputs[self.batch.groupby]

        duplicate_keys = find_duplicates(raw_groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"There are duplicate group_by ({self.batch.groupby}) keys in the input: "
                + ", ".join(duplicate_keys))

        groupby_values = [
            Validators.transform_identifier_to_be_valid(ident)
            for ident in raw_groupby_values
        ]
        duplicate_keys = find_duplicates(groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, "
                f"after this transformation, there were duplicates keys: " +
                ", ".join(duplicate_keys))

        w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values)

        steps_created = []

        stepid_from_gb = lambda gb: f"{gb}_{tool.id()}"

        for gbvalue in groupby_values:

            extra_ins = {}
            for f in fields:
                newkey = f"{f}_{gbvalue}"
                extra_ins[f] = w.input(newkey, insdict[f].intype)

            steps_created.append(
                w.step(stepid_from_gb(gbvalue), tool(**innode_base,
                                                     **extra_ins)))

        for out in tool.tool_outputs():
            output_folders = []
            output_name = out.id()
            if isinstance(tool, WorkflowBase):
                outnode = tool.output_nodes[out.id()]
                output_folders = outnode.output_folder or []

                if outnode.output_name is not None:
                    output_name = outnode.output_name

            for idx, gbvalue, raw_gbvalue in zip(range(len(groupby_values)),
                                                 groupby_values,
                                                 raw_groupby_values):
                transformed_inputs = {
                    **inputs,
                    **{f: inputs[f][idx]
                       for f in fields}
                }

                output_folders_transformed = Operator.evaluate_arg(
                    output_folders, transformed_inputs)
                output_name_transformed = Operator.evaluate_arg(
                    output_name, transformed_inputs)

                w.output(
                    f"{gbvalue}_{out.id()}",
                    source=w[stepid_from_gb(gbvalue)][out.id()],
                    output_name=output_name_transformed,
                    output_folder=[
                        raw_gbvalue, *(output_folders_transformed or [])
                    ],
                )

        return w
    def from_janis(
        wid: str,
        outdir: str,
        tool: Tool,
        environment: Environment,
        hints: Dict[str, str],
        validation_requirements: Optional[ValidationRequirements],
        batchrun_requirements: Optional[BatchRunRequirements],
        inputs_dict: dict = None,
        dryrun=False,
        watch=True,
        max_cores=None,
        max_memory=None,
        keep_intermediate_files=False,
        run_in_background=True,
        dbconfig=None,
        allow_empty_container=False,
        container_override: dict = None,
        check_files=True,
    ):

        jc = JanisConfiguration.manager()

        # output directory has been created

        environment.identifier += "_" + wid

        tm = WorkflowManager(wid=wid, outdir=outdir, environment=environment)

        tm.database.runs.insert(wid)

        tm.database.workflowmetadata.wid = wid
        tm.database.workflowmetadata.engine = environment.engine
        tm.database.workflowmetadata.filescheme = environment.filescheme
        tm.database.workflowmetadata.environment = environment.id()
        tm.database.workflowmetadata.name = tool.id()
        tm.database.workflowmetadata.start = DateUtil.now()
        tm.database.workflowmetadata.executiondir = None
        tm.database.workflowmetadata.keepexecutiondir = keep_intermediate_files
        tm.database.workflowmetadata.configuration = jc
        tm.database.workflowmetadata.dbconfig = dbconfig

        # This is the only time we're allowed to skip the tm.set_status
        # This is a temporary stop gap until "notification on status" is implemented.
        # tm.set_status(TaskStatus.PROCESSING)
        tm.database.workflowmetadata.status = TaskStatus.PROCESSING

        tm.database.commit()

        spec = get_ideal_specification_for_engine(environment.engine)
        spec_translator = get_translator(spec)
        tool_evaluate = tm.prepare_and_output_workflow_to_evaluate_if_required(
            tool=tool,
            translator=spec_translator,
            validation=validation_requirements,
            batchrun=batchrun_requirements,
            hints=hints,
            additional_inputs=inputs_dict,
            max_cores=max_cores or jc.environment.max_cores,
            max_memory=max_memory or jc.environment.max_ram,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )

        outdir_workflow = tm.get_path_for_component(
            WorkflowManager.WorkflowManagerPath.workflow
        )

        tm.database.workflowmetadata.submission_workflow = os.path.join(
            outdir_workflow, spec_translator.filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_inputs = os.path.join(
            outdir_workflow, spec_translator.inputs_filename(tool_evaluate)
        )
        tm.database.workflowmetadata.submission_resources = os.path.join(
            outdir_workflow, spec_translator.dependencies_filename(tool_evaluate)
        )

        tm.database.commit()

        if not dryrun:
            if (
                not run_in_background
                and jc.template
                and jc.template.template
                and jc.template.template.can_run_in_foreground is False
            ):
                raise Exception(
                    f"Your template '{jc.template.template.__class__.__name__}' is not allowed to run "
                    f"in the foreground, try adding the '--background' argument"
                )
            tm.start_or_submit(run_in_background=run_in_background, watch=watch)
        else:
            tm.set_status(TaskStatus.DRY_RUN)

        tm.database.commit()

        return tm
Example #24
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:
        from janis_bioinformatics.data_types import FastaWithDict, Vcf, Bed
        from janis_bioinformatics.tools.illumina import HapPyValidator_0_3_9

        failed_outputs, untyped_outputs = ensure_outputs_are_in_workflow_and_are_compatible(
            tool, self.validation.fields, Vcf())

        if len(failed_outputs) > 0:
            raise Exception(
                f"Some outputs for validation were not found in the tool '{tool.id()}': "
                f"{', '.join(failed_outputs)}")

        if len(untyped_outputs) > 0:
            Logger.critical(
                f"Some outputs for validation from the tool '{tool.id()}' were not "
                f"compatible with VCF: {', '.join(untyped_outputs)}")

        w = WorkflowBuilder(tool.id() + "_validated")

        w.input("validatorReference",
                FastaWithDict,
                value=self.validation.reference)
        w.input("validatorTruthVCF", Vcf, value=self.validation.truthVCF)
        w.input("validatorIntervals",
                Bed(optional=True),
                value=self.validation.intervals)

        inpdict = {
            i.id(): w.input(i.id(), i.intype)
            for i in tool.tool_inputs()
        }
        toolstp = w.step(tool.id(), tool(**inpdict))

        if isinstance(tool, Workflow):
            wf: Workflow = tool
            for o in wf.output_nodes.values():
                w.output(
                    identifier=o.id(),
                    source=toolstp[o.id()],
                    output_folder=o.output_folder,
                    output_name=o.output_name,
                )
        else:
            for o in tool.tool_outputs():
                w.output(identifier=o.id(), source=toolstp[o.id()])

        for o in self.validation.fields:

            sid = "validator_" + o
            valstp = w.step(
                sid,
                HapPyValidator_0_3_9(
                    compareVCF=toolstp[o],
                    reportPrefix=
                    o,  # this will generate an input node with format validator_{o}_reportPrefix
                    reference=w.validatorReference,
                    truthVCF=w.validatorTruthVCF,
                    intervals=w.validatorIntervals,
                ),
            )

            # Connect all the outputs of the validator to an output
            for vo in valstp.tool.outputs():
                w.output(
                    f"validated_{o}_{vo.id()}",
                    source=valstp[vo.id()],
                    output_folder="validated",
                )

        return w
    def do_bed_fasta_contig_check(tool: Tool, inputs: Dict[str, any]):
        from janis_bioinformatics.data_types import Fasta, Bed, BedTabix

        supported_bed_types = (Bed, BedTabix)

        beds_inputs = []
        refs = []

        for i in tool.tool_inputs():
            if isinstance(i.intype, supported_bed_types) or (
                    isinstance(i.intype, Array)
                    and isinstance(i.intype.subtype(), supported_bed_types)):
                beds_inputs.append(i)

            if (isinstance(i.intype, Fasta) and i.intype.secondary_files()
                    and ".fai" in i.intype.secondary_files()):
                refs.append(i)

        if len(refs) == 0:
            return
        if len(refs) > 1:
            Logger.info(
                "Skipping bioinformatics FASTA-BED file checks as there were more than 1 reference"
            )

        for inp_ref in refs:
            value_ref = inputs[inp_ref.id()]
            if not value_ref:
                Logger.warn(
                    f"Skipping '{inp_ref.id()}' as no value was provided")
                continue

            ref_contigs = ContigChecker.get_list_of_contigs_from_fastafai(
                value_ref + ".fai")

            if not ref_contigs:
                Logger.debug(
                    f"Didn't get any contigs from ref {value_ref}.fai, skipping..."
                )
                continue

            for inp_bed in beds_inputs:
                value_bed = inputs[inp_bed.id()]
                is_array = isinstance(value_bed, list)
                beds = value_bed if is_array else [value_bed]
                for b_idx in range(len(beds)):
                    bed = beds[b_idx]

                    bed_contigs = ContigChecker.get_list_of_contigs_from_bed(
                        bed)

                    missing_contigs = bed_contigs - ref_contigs
                    if missing_contigs:
                        inpname = (f"{inp_bed.id()}.{b_idx}"
                                   if is_array else inp_bed.id())
                        contiglist = (", ".join(missing_contigs)
                                      if len(missing_contigs) < 5 else
                                      (", ".join(list(missing_contigs)[:3]) +
                                       "..."))
                        Logger.warn(
                            f"The BED file '{inpname}' contained {len(missing_contigs)} contigs ({contiglist}) that were missing from the reference: {value_ref}"
                        )
Example #26
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:

        # Build custom pipeline

        w = WorkflowBuilder(tool.id(),
                            friendly_name=tool.friendly_name(),
                            version=tool.version())

        ins = tool.tool_inputs()
        insdict = {i.id(): i for i in ins}
        fields = set(self.batch.fields)

        inkeys = set(i.id() for i in ins)
        invalid_keys = fields - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"Couldn't create batchtool from fields {', '.join(invalid_keys)} "
                f"as they do not exist on '{tool.id()}'")

        if self.batch.groupby not in inputs:
            raise Exception(
                f"the group_by field '{self.batch.groupby}' was not found in the inputs"
            )

        innode_base = {}

        for i in ins:
            if i.id() in fields:
                continue

            innode_base[i.id()] = w.input(i.id(),
                                          i.intype,
                                          default=i.default,
                                          doc=i.doc)

        raw_groupby_values = inputs[self.batch.groupby]

        duplicate_keys = find_duplicates(raw_groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"There are duplicate group_by ({self.batch.groupby}) keys in the input: "
                + ", ".join(duplicate_keys))

        groupby_values = [
            Validators.transform_identifier_to_be_valid(ident)
            for ident in raw_groupby_values
        ]
        duplicate_keys = find_duplicates(groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, "
                f"after this transformation, there were duplicates keys: " +
                ", ".join(duplicate_keys))

        w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values)

        steps_created = []

        stepid_from_gb = lambda gb: f"{gbvalue}_{tool.id()}"

        for gbvalue in groupby_values:

            extra_ins = {}
            for f in fields:
                newkey = f"{f}_{gbvalue}"
                extra_ins[f] = w.input(newkey, insdict[f].intype)

            steps_created.append(
                w.step(stepid_from_gb(gbvalue), tool(**innode_base,
                                                     **extra_ins)))

        def transform_token_in_output_namers(token, outputid):
            if token is None:
                return token
            if isinstance(token, list):
                return [
                    transform_token_in_output_namers(t, outputid)
                    for t in token
                ]
            if isinstance(token, InputSelector):
                if token.input_to_select in fields:
                    # need to transform it
                    return InputSelector(f"{token.input_to_select}_{outputid}")
                else:
                    return token
            elif isinstance(token, (str, int, float, bool)):
                return token
            else:
                raise Exception(
                    f"Unsure how to translate token of type {token.__class__.__name__} "
                )

        for out in tool.tool_outputs():
            output_folders = []
            output_name = out.id()
            if isinstance(tool, Workflow):
                outnode = tool.output_nodes[out.id()]
                output_folders = outnode.output_folder or []

                if outnode.output_name:
                    output_name = outnode.output_name

            for gbvalue, raw_gbvalue in zip(groupby_values,
                                            raw_groupby_values):
                # This is pretty hacky, we're relying on the output_folder and output_name to be InputSelectors
                # or a literal value, otherwise this will probably break (this will probably break for expressions)

                output_folders_transformed = transform_token_in_output_namers(
                    output_folders, gbvalue)
                output_name_transformed = transform_token_in_output_namers(
                    output_name, gbvalue)

                w.output(
                    f"{gbvalue}_{out.id()}",
                    source=w[stepid_from_gb(gbvalue)][out.id()],
                    output_name=output_name_transformed,
                    output_folder=[
                        raw_gbvalue, *(output_folders_transformed or [])
                    ],
                )

        return w