def evaluate_translation(tool: Tool) -> Union[str, bool]: """ Evaluate if we can successfully translate to wdl and cwl # TODO: validate translations (will look into better way to ensure validation tool exists) :param tool: Janis tool :type tool: Tool :return: error message or True if we can successfully translate to wdl and cwl :rtype: Union[str, bool] """ engines = test_helpers.get_available_engines() output_dir = os.path.join(os.getcwd(), "tests_output", tool.id()) errors = [] for engine in engines: try: translator = engines[engine] translator.translate( tool, export_path=output_dir, to_console=False, to_disk=True ) except Exception as e: errors.append(f"{translator.name}: translation failed {str(e)}") if errors: return ", ".join(errors) return True
def prepare_tool( tool: Tool, toolversions: List[str], isorphan: bool, is_published_pipeline: bool = False, ): # Stuff to list on the documentation page: # - Versions of tools # - Generated command # - Cool if it grouped the tools by vendor # - if not tool: return None try: if is_published_pipeline: return "" if tool.type() == ToolType.CommandTool: return prepare_commandtool_page(tool, toolversions) elif tool.type() == ToolType.Workflow: return prepare_workflow_page(tool, toolversions) elif tool.type() == ToolType.CodeTool: return prepare_code_tool_page(tool, toolversions) except Exception as e: traceback.print_exc() Logger.critical("Couldn't generate documentation for " + tool.id() + " " + str(e))
def prepare_quickstart(tool: Tool): required_python_input_map = "\n".join(" " * 15 + i.id() + "=None," for i in tool.tool_inputs() if not i.intype.optional) python_step_name = tool.id().lower() + "_step" output_python_code = "\n".join( " " * 7 + f'wf.output("{o.id()}", source={python_step_name}.{o.id()})' for o in tool.tool_outputs()) python_codeblock = f"""\ .. code-block:: python from {tool.__module__} import {tool.__class__.__name__} wf = WorkflowBuilder("myworkflow") wf.step( "{python_step_name}", {tool.__class__.__name__}( {required_python_input_map} ) ) {output_python_code} """ return f"""\
def prepare_run_instructions_input_file(tool: Tool, user_inps: dict, other_inps: dict, reference_information: str): yaml_user_inps = CwlTranslator.stringify_translated_inputs(user_inps) yaml_other_inps = CwlTranslator.stringify_translated_inputs(other_inps) indented_user = "".join(" " * 7 + s for s in yaml_user_inps.splitlines(True)) indented_other = "".join(" " * 7 + s for s in yaml_other_inps.splitlines(True)) not_localising_secondary_warning = "" if isinstance(tool, WorkflowBase): inputs_that_arent_localising_secondary_files = [ t.id() for t in tool.tool_inputs() if t.doc.skip_sourcing_secondary_files ] if len(inputs_that_arent_localising_secondary_files) > 0: not_localising_secondary_warning = f"""\ .. warning:: The secondary files for the inputs '{"', '".join(inputs_that_arent_localising_secondary_files)}' will not automatically \ localise using janis prepare and are built just after download. Please note this can take a few hours to build \ before the pipeline runs. """ has_static = len(other_inps) > 0 tb = " " * 4 run_args = ["janis run [...run options]", tb + "--inputs inputs.yaml"] static_generation = ("" if not has_static else f"""\ # static inputs janis inputs --static {tool.id()} > static.yaml""") static_yaml = ("" if not has_static else f"""\ **static.yaml** .. code-block:: yaml {indented_other}""") if has_static: run_args.append(tb + "--inputs static.yaml") if isinstance(tool, CommandTool) and not tool.container(): run_args.append( tb + f"--container-override '{tool.id()}=<organisation/container:version>'" ) run_args.append(tb + tool.id()) run_statement = " \\\n".join(" " * 3 + el for el in run_args) if reference_information: reference_information = f"The following inputs have a suggested source. Using janis prepare with the relevant \ ``--source-hint`` will automatically download these files. See `below <#additional-configuration-inputs>`_ for \ more information about inputs for {tool.id()}.\n{reference_information}" return f"""\
def prepare_run_instructions_input_file(tool: Tool, user_inps: dict, other_inps: dict, reference_information: str): yaml_user_inps = CwlTranslator.stringify_translated_inputs(user_inps) yaml_other_inps = CwlTranslator.stringify_translated_inputs(other_inps) indented_user = "".join(" " * 7 + s for s in yaml_user_inps.splitlines(True)) indented_other = "".join(" " * 7 + s for s in yaml_other_inps.splitlines(True)) has_static = len(other_inps) > 0 tb = " " * 4 run_args = ["janis run [...run options]", tb + "--inputs inputs.yaml"] static_generation = ("" if not has_static else f"""\ # static inputs janis inputs --static {tool.id()} > static.yaml""") static_yaml = ("" if not has_static else f"""\ **static.yaml** .. code-block:: yaml {indented_other}""") if has_static: run_args.append(tb + "--inputs static.yaml") if isinstance(tool, CommandTool) and not tool.container(): run_args.append( tb + f"--container-override '{tool.id()}=<organisation/container:version>'" ) run_args.append(tb + tool.id()) run_statement = " \\\n".join(" " * 3 + el for el in run_args) return f"""\
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: from janis_bioinformatics.data_types import FastaWithDict, Vcf, Bed from janis_bioinformatics.tools.illumina import HapPyValidator_0_3_9 failed_outputs, untyped_outputs = ensure_outputs_are_in_workflow_and_are_compatible( tool, self.validation.fields, Vcf()) if len(failed_outputs) > 0: raise Exception( f"Some outputs for validation were not found in the tool '{tool.id()}': " f"{', '.join(failed_outputs)}") if len(untyped_outputs) > 0: Logger.critical( f"Some outputs for validation from the tool '{tool.id()}' were not " f"compatible with VCF: {', '.join(untyped_outputs)}") w = WorkflowBuilder(tool.id() + "_validated") w.input("validatorReference", FastaWithDict, value=self.validation.reference) w.input("validatorTruthVCF", Vcf, value=self.validation.truthVCF) w.input("validatorIntervals", Bed(optional=True), value=self.validation.intervals) inpdict = { i.id(): w.input(i.id(), i.intype) for i in tool.tool_inputs() } toolstp = w.step(tool.id(), tool(**inpdict)) if isinstance(tool, Workflow): wf: Workflow = tool for o in wf.output_nodes.values(): w.output( identifier=o.id(), source=toolstp[o.id()], output_folder=o.output_folder, output_name=o.output_name, ) else: for o in tool.tool_outputs(): w.output(identifier=o.id(), source=toolstp[o.id()]) for o in self.validation.fields: sid = "validator_" + o valstp = w.step( sid, HapPyValidator_0_3_9( compareVCF=toolstp[o], reportPrefix= o, # this will generate an input node with format validator_{o}_reportPrefix reference=w.validatorReference, truthVCF=w.validatorTruthVCF, intervals=w.validatorIntervals, ), ) # Connect all the outputs of the validator to an output for vo in valstp.tool.outputs(): w.output( f"validated_{o}_{vo.id()}", source=valstp[vo.id()], output_folder="validated", ) return w
def from_janis( wid: str, outdir: str, tool: Tool, environment: Environment, hints: Dict[str, str], validation_requirements: Optional[ValidationRequirements], batchrun_requirements: Optional[BatchRunRequirements], inputs_dict: dict = None, dryrun=False, watch=True, max_cores=None, max_memory=None, keep_intermediate_files=False, run_in_background=True, dbconfig=None, allow_empty_container=False, container_override: dict = None, check_files=True, ): jc = JanisConfiguration.manager() # output directory has been created environment.identifier += "_" + wid tm = WorkflowManager(wid=wid, outdir=outdir, environment=environment) tm.database.runs.insert(wid) tm.database.workflowmetadata.wid = wid tm.database.workflowmetadata.engine = environment.engine tm.database.workflowmetadata.filescheme = environment.filescheme tm.database.workflowmetadata.environment = environment.id() tm.database.workflowmetadata.name = tool.id() tm.database.workflowmetadata.start = DateUtil.now() tm.database.workflowmetadata.executiondir = None tm.database.workflowmetadata.keepexecutiondir = keep_intermediate_files tm.database.workflowmetadata.configuration = jc tm.database.workflowmetadata.dbconfig = dbconfig # This is the only time we're allowed to skip the tm.set_status # This is a temporary stop gap until "notification on status" is implemented. # tm.set_status(TaskStatus.PROCESSING) tm.database.workflowmetadata.status = TaskStatus.PROCESSING tm.database.commit() spec = get_ideal_specification_for_engine(environment.engine) spec_translator = get_translator(spec) tool_evaluate = tm.prepare_and_output_workflow_to_evaluate_if_required( tool=tool, translator=spec_translator, validation=validation_requirements, batchrun=batchrun_requirements, hints=hints, additional_inputs=inputs_dict, max_cores=max_cores or jc.environment.max_cores, max_memory=max_memory or jc.environment.max_ram, allow_empty_container=allow_empty_container, container_override=container_override, check_files=check_files, ) outdir_workflow = tm.get_path_for_component( WorkflowManager.WorkflowManagerPath.workflow ) tm.database.workflowmetadata.submission_workflow = os.path.join( outdir_workflow, spec_translator.filename(tool_evaluate) ) tm.database.workflowmetadata.submission_inputs = os.path.join( outdir_workflow, spec_translator.inputs_filename(tool_evaluate) ) tm.database.workflowmetadata.submission_resources = os.path.join( outdir_workflow, spec_translator.dependencies_filename(tool_evaluate) ) tm.database.commit() if not dryrun: if ( not run_in_background and jc.template and jc.template.template and jc.template.template.can_run_in_foreground is False ): raise Exception( f"Your template '{jc.template.template.__class__.__name__}' is not allowed to run " f"in the foreground, try adding the '--background' argument" ) tm.start_or_submit(run_in_background=run_in_background, watch=watch) else: tm.set_status(TaskStatus.DRY_RUN) tm.database.commit() return tm
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: # Build custom pipeline w = WorkflowBuilder(tool.id(), friendly_name=tool.friendly_name(), version=tool.version()) ins = tool.tool_inputs() insdict = {i.id(): i for i in ins} fields = set(self.batch.fields) inkeys = set(i.id() for i in ins) invalid_keys = fields - inkeys if len(invalid_keys) > 0: raise Exception( f"Couldn't create batchtool from fields {', '.join(invalid_keys)} " f"as they do not exist on '{tool.id()}'") if self.batch.groupby not in inputs: raise Exception( f"the group_by field '{self.batch.groupby}' was not found in the inputs" ) innode_base = {} for i in ins: if i.id() in fields: continue default = i.default if isinstance(default, Selector): default = None innode_base[i.id()] = w.input(i.id(), i.intype, default=default, doc=i.doc) raw_groupby_values = inputs[self.batch.groupby] duplicate_keys = find_duplicates(raw_groupby_values) if len(duplicate_keys) > 0: raise Exception( f"There are duplicate group_by ({self.batch.groupby}) keys in the input: " + ", ".join(duplicate_keys)) groupby_values = [ Validators.transform_identifier_to_be_valid(ident) for ident in raw_groupby_values ] duplicate_keys = find_duplicates(groupby_values) if len(duplicate_keys) > 0: raise Exception( f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, " f"after this transformation, there were duplicates keys: " + ", ".join(duplicate_keys)) w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values) steps_created = [] stepid_from_gb = lambda gb: f"{gb}_{tool.id()}" for gbvalue in groupby_values: extra_ins = {} for f in fields: newkey = f"{f}_{gbvalue}" extra_ins[f] = w.input(newkey, insdict[f].intype) steps_created.append( w.step(stepid_from_gb(gbvalue), tool(**innode_base, **extra_ins))) for out in tool.tool_outputs(): output_folders = [] output_name = out.id() if isinstance(tool, WorkflowBase): outnode = tool.output_nodes[out.id()] output_folders = outnode.output_folder or [] if outnode.output_name is not None: output_name = outnode.output_name for idx, gbvalue, raw_gbvalue in zip(range(len(groupby_values)), groupby_values, raw_groupby_values): transformed_inputs = { **inputs, **{f: inputs[f][idx] for f in fields} } output_folders_transformed = Operator.evaluate_arg( output_folders, transformed_inputs) output_name_transformed = Operator.evaluate_arg( output_name, transformed_inputs) w.output( f"{gbvalue}_{out.id()}", source=w[stepid_from_gb(gbvalue)][out.id()], output_name=output_name_transformed, output_folder=[ raw_gbvalue, *(output_folders_transformed or []) ], ) return w
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: # Build custom pipeline w = WorkflowBuilder(tool.id(), friendly_name=tool.friendly_name(), version=tool.version()) ins = tool.tool_inputs() insdict = {i.id(): i for i in ins} fields = set(self.batch.fields) inkeys = set(i.id() for i in ins) invalid_keys = fields - inkeys if len(invalid_keys) > 0: raise Exception( f"Couldn't create batchtool from fields {', '.join(invalid_keys)} " f"as they do not exist on '{tool.id()}'") if self.batch.groupby not in inputs: raise Exception( f"the group_by field '{self.batch.groupby}' was not found in the inputs" ) innode_base = {} for i in ins: if i.id() in fields: continue innode_base[i.id()] = w.input(i.id(), i.intype, default=i.default, doc=i.doc) raw_groupby_values = inputs[self.batch.groupby] duplicate_keys = find_duplicates(raw_groupby_values) if len(duplicate_keys) > 0: raise Exception( f"There are duplicate group_by ({self.batch.groupby}) keys in the input: " + ", ".join(duplicate_keys)) groupby_values = [ Validators.transform_identifier_to_be_valid(ident) for ident in raw_groupby_values ] duplicate_keys = find_duplicates(groupby_values) if len(duplicate_keys) > 0: raise Exception( f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, " f"after this transformation, there were duplicates keys: " + ", ".join(duplicate_keys)) w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values) steps_created = [] stepid_from_gb = lambda gb: f"{gbvalue}_{tool.id()}" for gbvalue in groupby_values: extra_ins = {} for f in fields: newkey = f"{f}_{gbvalue}" extra_ins[f] = w.input(newkey, insdict[f].intype) steps_created.append( w.step(stepid_from_gb(gbvalue), tool(**innode_base, **extra_ins))) def transform_token_in_output_namers(token, outputid): if token is None: return token if isinstance(token, list): return [ transform_token_in_output_namers(t, outputid) for t in token ] if isinstance(token, InputSelector): if token.input_to_select in fields: # need to transform it return InputSelector(f"{token.input_to_select}_{outputid}") else: return token elif isinstance(token, (str, int, float, bool)): return token else: raise Exception( f"Unsure how to translate token of type {token.__class__.__name__} " ) for out in tool.tool_outputs(): output_folders = [] output_name = out.id() if isinstance(tool, Workflow): outnode = tool.output_nodes[out.id()] output_folders = outnode.output_folder or [] if outnode.output_name: output_name = outnode.output_name for gbvalue, raw_gbvalue in zip(groupby_values, raw_groupby_values): # This is pretty hacky, we're relying on the output_folder and output_name to be InputSelectors # or a literal value, otherwise this will probably break (this will probably break for expressions) output_folders_transformed = transform_token_in_output_namers( output_folders, gbvalue) output_name_transformed = transform_token_in_output_namers( output_name, gbvalue) w.output( f"{gbvalue}_{out.id()}", source=w[stepid_from_gb(gbvalue)][out.id()], output_name=output_name_transformed, output_folder=[ raw_gbvalue, *(output_folders_transformed or []) ], ) return w