Esempio n. 1
0
def prepare_quickstart(tool: Tool):
    required_python_input_map = "\n".join(" " * 15 + i.id() + "=None,"
                                          for i in tool.tool_inputs()
                                          if not i.intype.optional)

    python_step_name = tool.id().lower() + "_step"
    output_python_code = "\n".join(
        " " * 7 + f'wf.output("{o.id()}", source={python_step_name}.{o.id()})'
        for o in tool.tool_outputs())
    python_codeblock = f"""\
    .. code-block:: python

       from {tool.__module__} import {tool.__class__.__name__}

       wf = WorkflowBuilder("myworkflow")

       wf.step(
           "{python_step_name}",
           {tool.__class__.__name__}(
{required_python_input_map}
           )
       )
{output_python_code}
    """

    return f"""\
    def evaluate_output_params(self, wf: Tool, additional_inputs: dict):

        mapped_inps = CwlTranslator().build_inputs_file(
            wf, recursive=False, additional_inputs=additional_inputs
        )
        output_names: Dict[str, any] = {}
        output_folders: Dict[str, any] = {}

        if isinstance(wf, Workflow):
            for o in wf.output_nodes.values():
                output_names[o.id()] = self.evaluate_output_selector(
                    o.output_name, mapped_inps
                )
                output_folders[o.id()] = self.evaluate_output_selector(
                    o.output_folder, mapped_inps
                )

        outputs: List[WorkflowOutputModel] = []

        for o in wf.tool_outputs():
            # We'll
            ext = None
            innertype = o.outtype
            iscopyable = isinstance(o.outtype, (File, Directory)) or (
                isinstance(o.outtype, Array)
                and isinstance(o.outtype.fundamental_type(), (File, Directory))
            )
            while isinstance(innertype, Array):
                innertype = innertype.subtype()
            if isinstance(o.outtype, File):
                ext = o.outtype.extension
            outputs.append(
                WorkflowOutputModel(
                    tag=o.id(),
                    iscopyable=iscopyable,
                    original_path=None,
                    new_path=None,
                    timestamp=None,
                    output_name=output_names.get(o.id()),
                    output_folder=output_folders.get(o.id()),
                    secondaries=o.outtype.secondary_files(),
                    extension=ext,
                )
            )

        return self.database.outputsDB.insert_many(outputs)
Esempio n. 3
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:
        from janis_bioinformatics.data_types import FastaWithDict, Vcf, Bed
        from janis_bioinformatics.tools.illumina import HapPyValidator_0_3_9

        failed_outputs, untyped_outputs = ensure_outputs_are_in_workflow_and_are_compatible(
            tool, self.validation.fields, Vcf())

        if len(failed_outputs) > 0:
            raise Exception(
                f"Some outputs for validation were not found in the tool '{tool.id()}': "
                f"{', '.join(failed_outputs)}")

        if len(untyped_outputs) > 0:
            Logger.critical(
                f"Some outputs for validation from the tool '{tool.id()}' were not "
                f"compatible with VCF: {', '.join(untyped_outputs)}")

        w = WorkflowBuilder(tool.id() + "_validated")

        w.input("validatorReference",
                FastaWithDict,
                value=self.validation.reference)
        w.input("validatorTruthVCF", Vcf, value=self.validation.truthVCF)
        w.input("validatorIntervals",
                Bed(optional=True),
                value=self.validation.intervals)

        inpdict = {
            i.id(): w.input(i.id(), i.intype)
            for i in tool.tool_inputs()
        }
        toolstp = w.step(tool.id(), tool(**inpdict))

        if isinstance(tool, Workflow):
            wf: Workflow = tool
            for o in wf.output_nodes.values():
                w.output(
                    identifier=o.id(),
                    source=toolstp[o.id()],
                    output_folder=o.output_folder,
                    output_name=o.output_name,
                )
        else:
            for o in tool.tool_outputs():
                w.output(identifier=o.id(), source=toolstp[o.id()])

        for o in self.validation.fields:

            sid = "validator_" + o
            valstp = w.step(
                sid,
                HapPyValidator_0_3_9(
                    compareVCF=toolstp[o],
                    reportPrefix=
                    o,  # this will generate an input node with format validator_{o}_reportPrefix
                    reference=w.validatorReference,
                    truthVCF=w.validatorTruthVCF,
                    intervals=w.validatorIntervals,
                ),
            )

            # Connect all the outputs of the validator to an output
            for vo in valstp.tool.outputs():
                w.output(
                    f"validated_{o}_{vo.id()}",
                    source=valstp[vo.id()],
                    output_folder="validated",
                )

        return w
Esempio n. 4
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:

        # Build custom pipeline

        w = WorkflowBuilder(tool.id(),
                            friendly_name=tool.friendly_name(),
                            version=tool.version())

        ins = tool.tool_inputs()
        insdict = {i.id(): i for i in ins}
        fields = set(self.batch.fields)

        inkeys = set(i.id() for i in ins)
        invalid_keys = fields - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"Couldn't create batchtool from fields {', '.join(invalid_keys)} "
                f"as they do not exist on '{tool.id()}'")

        if self.batch.groupby not in inputs:
            raise Exception(
                f"the group_by field '{self.batch.groupby}' was not found in the inputs"
            )

        innode_base = {}

        for i in ins:
            if i.id() in fields:
                continue

            default = i.default
            if isinstance(default, Selector):
                default = None

            innode_base[i.id()] = w.input(i.id(),
                                          i.intype,
                                          default=default,
                                          doc=i.doc)

        raw_groupby_values = inputs[self.batch.groupby]

        duplicate_keys = find_duplicates(raw_groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"There are duplicate group_by ({self.batch.groupby}) keys in the input: "
                + ", ".join(duplicate_keys))

        groupby_values = [
            Validators.transform_identifier_to_be_valid(ident)
            for ident in raw_groupby_values
        ]
        duplicate_keys = find_duplicates(groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, "
                f"after this transformation, there were duplicates keys: " +
                ", ".join(duplicate_keys))

        w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values)

        steps_created = []

        stepid_from_gb = lambda gb: f"{gb}_{tool.id()}"

        for gbvalue in groupby_values:

            extra_ins = {}
            for f in fields:
                newkey = f"{f}_{gbvalue}"
                extra_ins[f] = w.input(newkey, insdict[f].intype)

            steps_created.append(
                w.step(stepid_from_gb(gbvalue), tool(**innode_base,
                                                     **extra_ins)))

        for out in tool.tool_outputs():
            output_folders = []
            output_name = out.id()
            if isinstance(tool, WorkflowBase):
                outnode = tool.output_nodes[out.id()]
                output_folders = outnode.output_folder or []

                if outnode.output_name is not None:
                    output_name = outnode.output_name

            for idx, gbvalue, raw_gbvalue in zip(range(len(groupby_values)),
                                                 groupby_values,
                                                 raw_groupby_values):
                transformed_inputs = {
                    **inputs,
                    **{f: inputs[f][idx]
                       for f in fields}
                }

                output_folders_transformed = Operator.evaluate_arg(
                    output_folders, transformed_inputs)
                output_name_transformed = Operator.evaluate_arg(
                    output_name, transformed_inputs)

                w.output(
                    f"{gbvalue}_{out.id()}",
                    source=w[stepid_from_gb(gbvalue)][out.id()],
                    output_name=output_name_transformed,
                    output_folder=[
                        raw_gbvalue, *(output_folders_transformed or [])
                    ],
                )

        return w
Esempio n. 5
0
    def tool_modifier(self, tool: Tool, inputs: Dict,
                      hints: Dict[str, str]) -> Tool:

        # Build custom pipeline

        w = WorkflowBuilder(tool.id(),
                            friendly_name=tool.friendly_name(),
                            version=tool.version())

        ins = tool.tool_inputs()
        insdict = {i.id(): i for i in ins}
        fields = set(self.batch.fields)

        inkeys = set(i.id() for i in ins)
        invalid_keys = fields - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"Couldn't create batchtool from fields {', '.join(invalid_keys)} "
                f"as they do not exist on '{tool.id()}'")

        if self.batch.groupby not in inputs:
            raise Exception(
                f"the group_by field '{self.batch.groupby}' was not found in the inputs"
            )

        innode_base = {}

        for i in ins:
            if i.id() in fields:
                continue

            innode_base[i.id()] = w.input(i.id(),
                                          i.intype,
                                          default=i.default,
                                          doc=i.doc)

        raw_groupby_values = inputs[self.batch.groupby]

        duplicate_keys = find_duplicates(raw_groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"There are duplicate group_by ({self.batch.groupby}) keys in the input: "
                + ", ".join(duplicate_keys))

        groupby_values = [
            Validators.transform_identifier_to_be_valid(ident)
            for ident in raw_groupby_values
        ]
        duplicate_keys = find_duplicates(groupby_values)
        if len(duplicate_keys) > 0:
            raise Exception(
                f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, "
                f"after this transformation, there were duplicates keys: " +
                ", ".join(duplicate_keys))

        w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values)

        steps_created = []

        stepid_from_gb = lambda gb: f"{gbvalue}_{tool.id()}"

        for gbvalue in groupby_values:

            extra_ins = {}
            for f in fields:
                newkey = f"{f}_{gbvalue}"
                extra_ins[f] = w.input(newkey, insdict[f].intype)

            steps_created.append(
                w.step(stepid_from_gb(gbvalue), tool(**innode_base,
                                                     **extra_ins)))

        def transform_token_in_output_namers(token, outputid):
            if token is None:
                return token
            if isinstance(token, list):
                return [
                    transform_token_in_output_namers(t, outputid)
                    for t in token
                ]
            if isinstance(token, InputSelector):
                if token.input_to_select in fields:
                    # need to transform it
                    return InputSelector(f"{token.input_to_select}_{outputid}")
                else:
                    return token
            elif isinstance(token, (str, int, float, bool)):
                return token
            else:
                raise Exception(
                    f"Unsure how to translate token of type {token.__class__.__name__} "
                )

        for out in tool.tool_outputs():
            output_folders = []
            output_name = out.id()
            if isinstance(tool, Workflow):
                outnode = tool.output_nodes[out.id()]
                output_folders = outnode.output_folder or []

                if outnode.output_name:
                    output_name = outnode.output_name

            for gbvalue, raw_gbvalue in zip(groupby_values,
                                            raw_groupby_values):
                # This is pretty hacky, we're relying on the output_folder and output_name to be InputSelectors
                # or a literal value, otherwise this will probably break (this will probably break for expressions)

                output_folders_transformed = transform_token_in_output_namers(
                    output_folders, gbvalue)
                output_name_transformed = transform_token_in_output_namers(
                    output_name, gbvalue)

                w.output(
                    f"{gbvalue}_{out.id()}",
                    source=w[stepid_from_gb(gbvalue)][out.id()],
                    output_name=output_name_transformed,
                    output_folder=[
                        raw_gbvalue, *(output_folders_transformed or [])
                    ],
                )

        return w