def prepare_quickstart(tool: Tool): required_python_input_map = "\n".join(" " * 15 + i.id() + "=None," for i in tool.tool_inputs() if not i.intype.optional) python_step_name = tool.id().lower() + "_step" output_python_code = "\n".join( " " * 7 + f'wf.output("{o.id()}", source={python_step_name}.{o.id()})' for o in tool.tool_outputs()) python_codeblock = f"""\ .. code-block:: python from {tool.__module__} import {tool.__class__.__name__} wf = WorkflowBuilder("myworkflow") wf.step( "{python_step_name}", {tool.__class__.__name__}( {required_python_input_map} ) ) {output_python_code} """ return f"""\
def evaluate_output_params(self, wf: Tool, additional_inputs: dict): mapped_inps = CwlTranslator().build_inputs_file( wf, recursive=False, additional_inputs=additional_inputs ) output_names: Dict[str, any] = {} output_folders: Dict[str, any] = {} if isinstance(wf, Workflow): for o in wf.output_nodes.values(): output_names[o.id()] = self.evaluate_output_selector( o.output_name, mapped_inps ) output_folders[o.id()] = self.evaluate_output_selector( o.output_folder, mapped_inps ) outputs: List[WorkflowOutputModel] = [] for o in wf.tool_outputs(): # We'll ext = None innertype = o.outtype iscopyable = isinstance(o.outtype, (File, Directory)) or ( isinstance(o.outtype, Array) and isinstance(o.outtype.fundamental_type(), (File, Directory)) ) while isinstance(innertype, Array): innertype = innertype.subtype() if isinstance(o.outtype, File): ext = o.outtype.extension outputs.append( WorkflowOutputModel( tag=o.id(), iscopyable=iscopyable, original_path=None, new_path=None, timestamp=None, output_name=output_names.get(o.id()), output_folder=output_folders.get(o.id()), secondaries=o.outtype.secondary_files(), extension=ext, ) ) return self.database.outputsDB.insert_many(outputs)
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: from janis_bioinformatics.data_types import FastaWithDict, Vcf, Bed from janis_bioinformatics.tools.illumina import HapPyValidator_0_3_9 failed_outputs, untyped_outputs = ensure_outputs_are_in_workflow_and_are_compatible( tool, self.validation.fields, Vcf()) if len(failed_outputs) > 0: raise Exception( f"Some outputs for validation were not found in the tool '{tool.id()}': " f"{', '.join(failed_outputs)}") if len(untyped_outputs) > 0: Logger.critical( f"Some outputs for validation from the tool '{tool.id()}' were not " f"compatible with VCF: {', '.join(untyped_outputs)}") w = WorkflowBuilder(tool.id() + "_validated") w.input("validatorReference", FastaWithDict, value=self.validation.reference) w.input("validatorTruthVCF", Vcf, value=self.validation.truthVCF) w.input("validatorIntervals", Bed(optional=True), value=self.validation.intervals) inpdict = { i.id(): w.input(i.id(), i.intype) for i in tool.tool_inputs() } toolstp = w.step(tool.id(), tool(**inpdict)) if isinstance(tool, Workflow): wf: Workflow = tool for o in wf.output_nodes.values(): w.output( identifier=o.id(), source=toolstp[o.id()], output_folder=o.output_folder, output_name=o.output_name, ) else: for o in tool.tool_outputs(): w.output(identifier=o.id(), source=toolstp[o.id()]) for o in self.validation.fields: sid = "validator_" + o valstp = w.step( sid, HapPyValidator_0_3_9( compareVCF=toolstp[o], reportPrefix= o, # this will generate an input node with format validator_{o}_reportPrefix reference=w.validatorReference, truthVCF=w.validatorTruthVCF, intervals=w.validatorIntervals, ), ) # Connect all the outputs of the validator to an output for vo in valstp.tool.outputs(): w.output( f"validated_{o}_{vo.id()}", source=valstp[vo.id()], output_folder="validated", ) return w
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: # Build custom pipeline w = WorkflowBuilder(tool.id(), friendly_name=tool.friendly_name(), version=tool.version()) ins = tool.tool_inputs() insdict = {i.id(): i for i in ins} fields = set(self.batch.fields) inkeys = set(i.id() for i in ins) invalid_keys = fields - inkeys if len(invalid_keys) > 0: raise Exception( f"Couldn't create batchtool from fields {', '.join(invalid_keys)} " f"as they do not exist on '{tool.id()}'") if self.batch.groupby not in inputs: raise Exception( f"the group_by field '{self.batch.groupby}' was not found in the inputs" ) innode_base = {} for i in ins: if i.id() in fields: continue default = i.default if isinstance(default, Selector): default = None innode_base[i.id()] = w.input(i.id(), i.intype, default=default, doc=i.doc) raw_groupby_values = inputs[self.batch.groupby] duplicate_keys = find_duplicates(raw_groupby_values) if len(duplicate_keys) > 0: raise Exception( f"There are duplicate group_by ({self.batch.groupby}) keys in the input: " + ", ".join(duplicate_keys)) groupby_values = [ Validators.transform_identifier_to_be_valid(ident) for ident in raw_groupby_values ] duplicate_keys = find_duplicates(groupby_values) if len(duplicate_keys) > 0: raise Exception( f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, " f"after this transformation, there were duplicates keys: " + ", ".join(duplicate_keys)) w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values) steps_created = [] stepid_from_gb = lambda gb: f"{gb}_{tool.id()}" for gbvalue in groupby_values: extra_ins = {} for f in fields: newkey = f"{f}_{gbvalue}" extra_ins[f] = w.input(newkey, insdict[f].intype) steps_created.append( w.step(stepid_from_gb(gbvalue), tool(**innode_base, **extra_ins))) for out in tool.tool_outputs(): output_folders = [] output_name = out.id() if isinstance(tool, WorkflowBase): outnode = tool.output_nodes[out.id()] output_folders = outnode.output_folder or [] if outnode.output_name is not None: output_name = outnode.output_name for idx, gbvalue, raw_gbvalue in zip(range(len(groupby_values)), groupby_values, raw_groupby_values): transformed_inputs = { **inputs, **{f: inputs[f][idx] for f in fields} } output_folders_transformed = Operator.evaluate_arg( output_folders, transformed_inputs) output_name_transformed = Operator.evaluate_arg( output_name, transformed_inputs) w.output( f"{gbvalue}_{out.id()}", source=w[stepid_from_gb(gbvalue)][out.id()], output_name=output_name_transformed, output_folder=[ raw_gbvalue, *(output_folders_transformed or []) ], ) return w
def tool_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Tool: # Build custom pipeline w = WorkflowBuilder(tool.id(), friendly_name=tool.friendly_name(), version=tool.version()) ins = tool.tool_inputs() insdict = {i.id(): i for i in ins} fields = set(self.batch.fields) inkeys = set(i.id() for i in ins) invalid_keys = fields - inkeys if len(invalid_keys) > 0: raise Exception( f"Couldn't create batchtool from fields {', '.join(invalid_keys)} " f"as they do not exist on '{tool.id()}'") if self.batch.groupby not in inputs: raise Exception( f"the group_by field '{self.batch.groupby}' was not found in the inputs" ) innode_base = {} for i in ins: if i.id() in fields: continue innode_base[i.id()] = w.input(i.id(), i.intype, default=i.default, doc=i.doc) raw_groupby_values = inputs[self.batch.groupby] duplicate_keys = find_duplicates(raw_groupby_values) if len(duplicate_keys) > 0: raise Exception( f"There are duplicate group_by ({self.batch.groupby}) keys in the input: " + ", ".join(duplicate_keys)) groupby_values = [ Validators.transform_identifier_to_be_valid(ident) for ident in raw_groupby_values ] duplicate_keys = find_duplicates(groupby_values) if len(duplicate_keys) > 0: raise Exception( f"Janis transformed values in the group_by field ({self.batch.groupby}) to be a valid identifiers, " f"after this transformation, there were duplicates keys: " + ", ".join(duplicate_keys)) w.input(self.GROUPBY_FIELDNAME, Array(str), value=groupby_values) steps_created = [] stepid_from_gb = lambda gb: f"{gbvalue}_{tool.id()}" for gbvalue in groupby_values: extra_ins = {} for f in fields: newkey = f"{f}_{gbvalue}" extra_ins[f] = w.input(newkey, insdict[f].intype) steps_created.append( w.step(stepid_from_gb(gbvalue), tool(**innode_base, **extra_ins))) def transform_token_in_output_namers(token, outputid): if token is None: return token if isinstance(token, list): return [ transform_token_in_output_namers(t, outputid) for t in token ] if isinstance(token, InputSelector): if token.input_to_select in fields: # need to transform it return InputSelector(f"{token.input_to_select}_{outputid}") else: return token elif isinstance(token, (str, int, float, bool)): return token else: raise Exception( f"Unsure how to translate token of type {token.__class__.__name__} " ) for out in tool.tool_outputs(): output_folders = [] output_name = out.id() if isinstance(tool, Workflow): outnode = tool.output_nodes[out.id()] output_folders = outnode.output_folder or [] if outnode.output_name: output_name = outnode.output_name for gbvalue, raw_gbvalue in zip(groupby_values, raw_groupby_values): # This is pretty hacky, we're relying on the output_folder and output_name to be InputSelectors # or a literal value, otherwise this will probably break (this will probably break for expressions) output_folders_transformed = transform_token_in_output_namers( output_folders, gbvalue) output_name_transformed = transform_token_in_output_namers( output_name, gbvalue) w.output( f"{gbvalue}_{out.id()}", source=w[stepid_from_gb(gbvalue)][out.id()], output_name=output_name_transformed, output_folder=[ raw_gbvalue, *(output_folders_transformed or []) ], ) return w