def generate_pipeline_box(workflow: Workflow, leading_space=""): meta: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata tag_component = lambda tag: f'<span class="no-select tagelement">{tag}</span>' href = f"{workflow.id().lower()}.html" tags = "".join(tag_component(t) for t in (meta.keywords or [])) date: datetime = meta.dateUpdated or meta.dateCreated contributors = meta.contributors or [] max_display_conts = 5 if len(contributors) < max_display_conts: contributorstr = ", ".join(meta.contributors or ["None"]) else: nothers = len(contributors) - max_display_conts + 2 contributorstr = ( ", ".join(meta.contributors[: max_display_conts - 2]) + f" and {nothers} others" ) return "\n".join( leading_space + l for l in f""" <div class="col-6" style="margin: 10px; padding: 20px; border: 1px solid #e3e3e3; border-radius: 5px;"> <h4 style="margin-bottom: 10px"><a href="{href}">{workflow.friendly_name()}</a></h4> {f'<p>{tags}</p>' if tags else ""} <p>{meta.short_documentation or "<em>Short documentation required</em>"}</p> {version_html(workflow.version() or meta.version)} <p style="margin-bottom: 0px; font-size: 12px">Contributors: {contributorstr}</p> </div>""".split( "\n" ) )
def cascade_batchrun_inputs( workflow: Workflow, inputs: List[Dict], options: BatchRunRequirements ): fields_to_group = set(options.fields) fields_to_group.add(options.groupby) wfins = workflow.inputs_map() required_ar_depth_of_groupby_fields = { f: 1 + count_janisarray_depth(wfins[f].intype) for f in fields_to_group } ins = {} for inp in inputs: for k, v in inp.items(): if k in fields_to_group: if k not in ins: ins[k] = [] # We'll look at the shape of the data, and decide whether # we can just use the value, or we need to wrap it in another array if count_array_depth(v) < required_ar_depth_of_groupby_fields[k]: v = [v] ins[k].extend(v) else: # overwrite the previous value ins[k] = v # If inputs return ins
def ingest_workflow_input(self, wf: j.Workflow, inp): return wf.input( identifier=self.get_tag_from_identifier(inp.id), datatype=self.ingest_cwl_type(inp.type, secondary_files=inp.secondaryFiles), default=inp.default, doc=inp.doc, )
def parse_workflow_source(self, wf: j.Workflow, step_input, potential_prefix: Optional[str] = None): if step_input is None: return None if isinstance(step_input, list): return [ self.parse_workflow_source(wf, si, potential_prefix=potential_prefix) for si in step_input ] if not isinstance(step_input, str): raise Exception(f"Can't parse step_input {step_input}") parsed_step_input = self.get_source_from_identifier(step_input) if parsed_step_input.startswith(wf.id() + "/"): parsed_step_input = parsed_step_input[len(wf.id()) + 1:] if potential_prefix and parsed_step_input.startswith(potential_prefix + "/"): parsed_step_input = parsed_step_input[len(potential_prefix) + 1:] if parsed_step_input.startswith("$("): raise Exception( f"This script can't parse expressions in the step input {step_input}" ) [*ignore, source_str, tag_str ] = (parsed_step_input.split("/") if "/" in parsed_step_input else (parsed_step_input, None)) tag_str = self.get_tag_from_identifier(tag_str) if source_str not in wf.nodes: raise KeyError(f"Couldn't find input / step {source_str} in nodes") source = wf[source_str] from janis_core.workflow.workflow import StepNode if tag_str and isinstance(source, StepNode): source = source.get_item(tag_str) return source
def ingest_workflow_step(self, wf: j.Workflow, stp): import cwl_utils.parser_v1_2 as cwlgen stp: cwlgen.WorkflowStep = stp step_identifier = self.get_tag_from_identifier(stp.id) if isinstance(stp.run, (self.cwlgen.CommandLineTool, self.cwlgen.Workflow)): tool = self.from_loaded_doc(stp.run) else: tool = CWlParser.from_doc(stp.run, base_uri=self.base_uri) inputs = {} for inp in stp.in_: inp: cwlgen.WorkflowStepInput = inp inp_identifier = self.get_tag_from_identifier(inp.id) source = None if inp.source is not None: source = self.parse_workflow_source( wf, inp.source, potential_prefix=step_identifier) elif inp.valueFrom is not None: source = self.parse_basic_expression(inp.valueFrom) elif inp.default: source = inp.default if source is None: print(f"Source is None from object: {inp.save()}") inputs[inp_identifier] = source scatter = None if stp.scatter: scatter_fields_raw = stp.scatter if not isinstance(scatter_fields_raw, list): scatter_fields_raw = [scatter_fields_raw] scatter_fields = [] for field in scatter_fields_raw: [*other_fields, input_to_scatter] = field.split("/") scatter_fields.append(input_to_scatter) scatter_method = stp.scatterMethod scatter = j.ScatterDescription( fields=scatter_fields, method=self.ingest_scatter_method(scatter_method)) return wf.step( identifier=step_identifier, tool=tool(**inputs), scatter=scatter, when=None, doc=stp.doc, )
def ingest_workflow_output(self, wf: j.Workflow, out): import cwl_utils.parser_v1_2 as cwlgen out: cwlgen.WorkflowOutputParameter = out identifier = self.get_tag_from_identifier(out.id) out_source = self.parse_workflow_source(wf, out.outputSource, potential_prefix=identifier) return wf.output( identifier=identifier, datatype=self.ingest_cwl_type(out.type, secondary_files=out.secondaryFiles), source=out_source, )
def cascade_inputs( wf: Workflow, inputs: Optional[Union[Dict, List[Union[str, Dict]]]], required_inputs: Optional[Dict], batchrun_options: Optional[BatchRunRequirements], ): list_of_input_dicts: List[Dict] = [] if inputs: if not isinstance(inputs, list): inputs = [inputs] for inp in inputs: if isinstance(inp, dict): list_of_input_dicts.append(inp) else: inputsfile = get_file_from_searchname(inp, ".") if inputsfile is None: raise FileNotFoundError("Couldn't find inputs file: " + str(inp)) list_of_input_dicts.append(parse_dict(inputsfile)) if required_inputs: reqkeys = set(required_inputs.keys()) inkeys = set(wf.all_input_keys()) invalid_keys = reqkeys - inkeys if len(invalid_keys) > 0: raise Exception( f"There were unrecognised inputs provided to the tool \"{wf.id()}\", keys: {', '.join(invalid_keys)}" ) list_of_input_dicts.append(parse_known_inputs(wf, required_inputs)) ins = None if batchrun_options: ins = cascade_batchrun_inputs(wf, list_of_input_dicts, batchrun_options) else: ins = cascade_regular_inputs(list_of_input_dicts) return ins
def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True): config = JanisConfiguration.manager() """ If you don't spec """ if not outdir and not config.outputdir: raise Exception( f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' " f"in your configuration)" ) default_outdir = None if config.outputdir: default_outdir = os.path.join(config.outputdir, wf.id()) forbiddenids = set() if store_in_centraldb: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall() ) if outdir: if os.path.exists(outdir): # this should theoretically scoop through all the ones in the taskDB and # add them to the forbidden ones, though this might cause more issues for now. forbiddenids = forbiddenids.union(set(os.listdir(outdir))) else: if os.path.exists(default_outdir): forbiddenids = forbiddenids.union(set(os.listdir(default_outdir))) wid = generate_new_id(forbiddenids) task_path = outdir if not task_path: od = default_outdir dt = datetime.now().strftime("%Y%m%d_%H%M%S") task_path = os.path.join(od, f"{dt}_{wid}/") task_path = fully_qualify_filename(task_path) Logger.info(f"Starting task with id = '{wid}'") row = TaskRow(wid, task_path) WorkflowManager.create_dir_structure(task_path) if store_in_centraldb: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def prepare_workflow_page(workflow: Workflow, versions: List[str]): if not workflow: return None metadata: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata if not workflow.friendly_name(): raise Exception( f"Tool '{type(workflow).__name__}' ({workflow.id()}) did not provide the required 'friendly_name' for the docs" ) fn = workflow.friendly_name() if workflow.friendly_name() else workflow.id( ) en = f" ({workflow.id()})" if fn != workflow.id() else "" tn = fn + en onelinedescription = prepare_byline(metadata.short_documentation, metadata.contributors, versions) citation = "\n\n".join( [el for el in [metadata.citation, metadata.doi] if el]) formatted_url = (format_rst_link(metadata.documentationUrl, metadata.documentationUrl) if metadata.documentationUrl else "*No URL to the documentation was provided*") toolmetadata = [ ("ID", f"``{workflow.id()}``"), ("URL", formatted_url), ("Versions", ", ".join(str(s) for s in versions[::-1]) if versions else ""), ("Authors", ", ".join(metadata.contributors)), ("Citations", citation), ("Created", str(metadata.dateCreated)), ("Updated", str(metadata.dateUpdated)), ] embeddedtoolsraw = { f"{s.tool.id()}/{s.tool.version()}": s.tool for s in workflow.step_nodes.values() } embeddedtools = tabulate( [[tool.friendly_name(), f"``{key}``"] for key, tool in embeddedtoolsraw.items()], tablefmt="rst", ) input_headers = ["name", "type", "documentation"] required_input_tuples = [[ i.id(), i.intype.id(), i.doc.doc if i.doc else "" ] for i in workflow.tool_inputs() if not i.intype.optional] optional_input_tuples = [[ i.id(), i.intype.id(), i.doc.doc if i.doc else "" ] for i in workflow.tool_inputs() if i.intype.optional] formatted_inputs = tabulate(required_input_tuples + optional_input_tuples, input_headers, tablefmt="rst") formatted_toolversions_array = [] formatted_toolincludes_array = [] for v in versions: link = get_tool_url(workflow.id(), v) formatted_toolincludes_array.append(".. include:: " + link) if v == workflow.version(): formatted_toolversions_array.append( f"- {v} (current)" ) # + format_rst_link(v + " (current)", link)) else: formatted_toolversions_array.append( "- " + format_rst_link(v, link + ".html")) output_headers = ["name", "type", "documentation"] output_tuples = [[o.id(), o.outtype.id(), o.doc.doc] for o in workflow.tool_outputs()] formatted_outputs = tabulate(output_tuples, output_headers, tablefmt="rst") tool_prov = "" if workflow.tool_provider() is None: print("Tool :" + workflow.id() + " has no company") else: tool_prov = "." + workflow.tool_provider().lower() workflow_image = ("" if not SHOW_WORKFLOW_IMAGE else """ Workflow -------- .. raw:: html <script src="https://cdnjs.cloudflare.com/ajax/libs/vue/2.6.10/vue.min.js"></script> <script src="https://unpkg.com/vue-cwl/dist/index.js"></script> <div id="vue" style="width: 800px; height: 500px; border-radius: 5px; overflow: hidden;"> <cwl cwl-url="https://unpkg.com/[email protected]/cwl-samples/fastqc.json"></cwl> </div> <script> new Vue({{ el: '#vue', components: {{ cwl: vueCwl.default }} }}); </script> """) nl = "\n" return f"""\
def prepare_workflow_page(workflow: Workflow, versions: List[str]): if not workflow: return None metadata: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata if not workflow.friendly_name(): raise Exception( f"Tool '{type(workflow).__name__}' ({workflow.id()}) did not provide the required 'friendly_name' for the docs" ) fn = workflow.friendly_name() if workflow.friendly_name() else workflow.id( ) en = f" ({workflow.id()})" if fn != workflow.id() else "" tn = fn + en onelinedescription = prepare_byline(workflow.id(), metadata.short_documentation, metadata.contributors, versions) citation = "\n\n".join( [el for el in [metadata.citation, metadata.doi] if el]) formatted_url = (format_rst_link(metadata.documentationUrl, metadata.documentationUrl) if metadata.documentationUrl else "*No URL to the documentation was provided*") toolmetadata = [ ("ID", f"``{workflow.id()}``"), ("URL", formatted_url), ("Versions", ", ".join(str(s) for s in versions[::-1]) if versions else ""), ("Authors", ", ".join(metadata.contributors)), ("Citations", citation), ("Created", str(metadata.dateCreated)), ("Updated", str(metadata.dateUpdated)), ] embeddedtoolsraw = { f"{s.tool.id()}/{s.tool.version()}": s.tool for s in workflow.step_nodes.values() } embeddedtools = tabulate( [[tool.friendly_name(), f"``{key}``"] for key, tool in embeddedtoolsraw.items()], tablefmt="rst", ) input_headers = ["name", "type", "documentation"] required_input_tuples = [[ i.id(), i.intype.id(), i.doc.doc if i.doc else "" ] for i in workflow.tool_inputs() if not i.intype.optional] optional_input_tuples = [[ i.id(), i.intype.id(), i.doc.doc if i.doc else "" ] for i in workflow.tool_inputs() if i.intype.optional] formatted_inputs = tabulate(required_input_tuples + optional_input_tuples, input_headers, tablefmt="rst") formatted_toolversions_array = [] formatted_toolincludes_array = [] for v in versions: link = get_tool_url(workflow.id(), v) formatted_toolincludes_array.append(".. include:: " + link) if v == workflow.version(): formatted_toolversions_array.append( f"- {v} (current)" ) # + format_rst_link(v + " (current)", link)) else: formatted_toolversions_array.append( "- " + format_rst_link(v, link + ".html")) output_headers = ["name", "type", "documentation"] output_tuples = [[o.id(), o.outtype.id(), o.doc.doc] for o in workflow.tool_outputs()] formatted_outputs = tabulate(output_tuples, output_headers, tablefmt="rst") cwl = workflow.translate("cwl", to_console=False, allow_empty_container=True)[0] wdl = workflow.translate("wdl", to_console=False, allow_empty_container=True)[0] tool_prov = "" if workflow.tool_provider() is None: print("Tool :" + workflow.id() + " has no company") else: tool_prov = "." + workflow.tool_provider().lower() workflow_image = requote_uri(workflow.versioned_id()) + ".dot.png" nl = "\n" return f"""\