def generate_pipeline_box(workflow: Workflow, leading_space=""):

    meta: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata

    tag_component = lambda tag: f'<span class="no-select tagelement">{tag}</span>'
    href = f"{workflow.id().lower()}.html"

    tags = "".join(tag_component(t) for t in (meta.keywords or []))
    date: datetime = meta.dateUpdated or meta.dateCreated

    contributors = meta.contributors or []
    max_display_conts = 5
    if len(contributors) < max_display_conts:
        contributorstr = ", ".join(meta.contributors or ["None"])
    else:
        nothers = len(contributors) - max_display_conts + 2
        contributorstr = (
            ", ".join(meta.contributors[: max_display_conts - 2])
            + f" and {nothers} others"
        )

    return "\n".join(
        leading_space + l
        for l in f"""
<div class="col-6" style="margin: 10px; padding: 20px; border: 1px solid #e3e3e3; border-radius: 5px;">
    <h4 style="margin-bottom: 10px"><a href="{href}">{workflow.friendly_name()}</a></h4>
    {f'<p>{tags}</p>' if tags else ""}
    <p>{meta.short_documentation or "<em>Short documentation required</em>"}</p>
    {version_html(workflow.version() or meta.version)}
    <p style="margin-bottom: 0px; font-size: 12px">Contributors: {contributorstr}</p>
</div>""".split(
            "\n"
        )
    )
def cascade_batchrun_inputs(
    workflow: Workflow, inputs: List[Dict], options: BatchRunRequirements
):
    fields_to_group = set(options.fields)
    fields_to_group.add(options.groupby)

    wfins = workflow.inputs_map()

    required_ar_depth_of_groupby_fields = {
        f: 1 + count_janisarray_depth(wfins[f].intype) for f in fields_to_group
    }

    ins = {}

    for inp in inputs:
        for k, v in inp.items():
            if k in fields_to_group:
                if k not in ins:
                    ins[k] = []

                # We'll look at the shape of the data, and decide whether
                # we can just use the value, or we need to wrap it in another array
                if count_array_depth(v) < required_ar_depth_of_groupby_fields[k]:
                    v = [v]
                ins[k].extend(v)
            else:
                # overwrite the previous value
                ins[k] = v

    # If inputs
    return ins
Exemple #3
0
    def ingest_workflow_input(self, wf: j.Workflow, inp):

        return wf.input(
            identifier=self.get_tag_from_identifier(inp.id),
            datatype=self.ingest_cwl_type(inp.type,
                                          secondary_files=inp.secondaryFiles),
            default=inp.default,
            doc=inp.doc,
        )
Exemple #4
0
    def parse_workflow_source(self,
                              wf: j.Workflow,
                              step_input,
                              potential_prefix: Optional[str] = None):
        if step_input is None:
            return None
        if isinstance(step_input, list):
            return [
                self.parse_workflow_source(wf,
                                           si,
                                           potential_prefix=potential_prefix)
                for si in step_input
            ]

        if not isinstance(step_input, str):
            raise Exception(f"Can't parse step_input {step_input}")

        parsed_step_input = self.get_source_from_identifier(step_input)
        if parsed_step_input.startswith(wf.id() + "/"):
            parsed_step_input = parsed_step_input[len(wf.id()) + 1:]
        if potential_prefix and parsed_step_input.startswith(potential_prefix +
                                                             "/"):
            parsed_step_input = parsed_step_input[len(potential_prefix) + 1:]

        if parsed_step_input.startswith("$("):
            raise Exception(
                f"This script can't parse expressions in the step input {step_input}"
            )

        [*ignore, source_str, tag_str
         ] = (parsed_step_input.split("/") if "/" in parsed_step_input else
              (parsed_step_input, None))

        tag_str = self.get_tag_from_identifier(tag_str)
        if source_str not in wf.nodes:
            raise KeyError(f"Couldn't find input / step {source_str} in nodes")
        source = wf[source_str]
        from janis_core.workflow.workflow import StepNode

        if tag_str and isinstance(source, StepNode):
            source = source.get_item(tag_str)
        return source
Exemple #5
0
    def ingest_workflow_step(self, wf: j.Workflow, stp):
        import cwl_utils.parser_v1_2 as cwlgen

        stp: cwlgen.WorkflowStep = stp
        step_identifier = self.get_tag_from_identifier(stp.id)

        if isinstance(stp.run,
                      (self.cwlgen.CommandLineTool, self.cwlgen.Workflow)):
            tool = self.from_loaded_doc(stp.run)
        else:
            tool = CWlParser.from_doc(stp.run, base_uri=self.base_uri)

        inputs = {}
        for inp in stp.in_:
            inp: cwlgen.WorkflowStepInput = inp
            inp_identifier = self.get_tag_from_identifier(inp.id)

            source = None
            if inp.source is not None:
                source = self.parse_workflow_source(
                    wf, inp.source, potential_prefix=step_identifier)
            elif inp.valueFrom is not None:
                source = self.parse_basic_expression(inp.valueFrom)
            elif inp.default:
                source = inp.default

            if source is None:
                print(f"Source is None from object: {inp.save()}")
            inputs[inp_identifier] = source

        scatter = None
        if stp.scatter:
            scatter_fields_raw = stp.scatter
            if not isinstance(scatter_fields_raw, list):
                scatter_fields_raw = [scatter_fields_raw]

            scatter_fields = []
            for field in scatter_fields_raw:
                [*other_fields, input_to_scatter] = field.split("/")
                scatter_fields.append(input_to_scatter)

            scatter_method = stp.scatterMethod
            scatter = j.ScatterDescription(
                fields=scatter_fields,
                method=self.ingest_scatter_method(scatter_method))

        return wf.step(
            identifier=step_identifier,
            tool=tool(**inputs),
            scatter=scatter,
            when=None,
            doc=stp.doc,
        )
Exemple #6
0
    def ingest_workflow_output(self, wf: j.Workflow, out):
        import cwl_utils.parser_v1_2 as cwlgen

        out: cwlgen.WorkflowOutputParameter = out
        identifier = self.get_tag_from_identifier(out.id)
        out_source = self.parse_workflow_source(wf,
                                                out.outputSource,
                                                potential_prefix=identifier)
        return wf.output(
            identifier=identifier,
            datatype=self.ingest_cwl_type(out.type,
                                          secondary_files=out.secondaryFiles),
            source=out_source,
        )
def cascade_inputs(
    wf: Workflow,
    inputs: Optional[Union[Dict, List[Union[str, Dict]]]],
    required_inputs: Optional[Dict],
    batchrun_options: Optional[BatchRunRequirements],
):

    list_of_input_dicts: List[Dict] = []

    if inputs:
        if not isinstance(inputs, list):
            inputs = [inputs]
        for inp in inputs:
            if isinstance(inp, dict):
                list_of_input_dicts.append(inp)
            else:
                inputsfile = get_file_from_searchname(inp, ".")
                if inputsfile is None:
                    raise FileNotFoundError("Couldn't find inputs file: " + str(inp))
                list_of_input_dicts.append(parse_dict(inputsfile))

    if required_inputs:
        reqkeys = set(required_inputs.keys())
        inkeys = set(wf.all_input_keys())
        invalid_keys = reqkeys - inkeys
        if len(invalid_keys) > 0:
            raise Exception(
                f"There were unrecognised inputs provided to the tool \"{wf.id()}\", keys: {', '.join(invalid_keys)}"
            )

        list_of_input_dicts.append(parse_known_inputs(wf, required_inputs))

    ins = None
    if batchrun_options:
        ins = cascade_batchrun_inputs(wf, list_of_input_dicts, batchrun_options)
    else:
        ins = cascade_regular_inputs(list_of_input_dicts)

    return ins
Exemple #8
0
    def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True):
        config = JanisConfiguration.manager()

        """
        If you don't spec
        
        """

        if not outdir and not config.outputdir:
            raise Exception(
                f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' "
                f"in your configuration)"
            )

        default_outdir = None

        if config.outputdir:
            default_outdir = os.path.join(config.outputdir, wf.id())

        forbiddenids = set()
        if store_in_centraldb:
            with self.with_cursor() as cursor:
                forbiddenids = set(
                    t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall()
                )
        if outdir:
            if os.path.exists(outdir):
                # this should theoretically scoop through all the ones in the taskDB and
                # add them to the forbidden ones, though this might cause more issues for now.
                forbiddenids = forbiddenids.union(set(os.listdir(outdir)))
        else:
            if os.path.exists(default_outdir):
                forbiddenids = forbiddenids.union(set(os.listdir(default_outdir)))

        wid = generate_new_id(forbiddenids)

        task_path = outdir
        if not task_path:
            od = default_outdir
            dt = datetime.now().strftime("%Y%m%d_%H%M%S")
            task_path = os.path.join(od, f"{dt}_{wid}/")

        task_path = fully_qualify_filename(task_path)

        Logger.info(f"Starting task with id = '{wid}'")

        row = TaskRow(wid, task_path)
        WorkflowManager.create_dir_structure(task_path)

        if store_in_centraldb:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row
Exemple #9
0
def prepare_workflow_page(workflow: Workflow, versions: List[str]):
    if not workflow:
        return None

    metadata: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata

    if not workflow.friendly_name():
        raise Exception(
            f"Tool '{type(workflow).__name__}' ({workflow.id()}) did not provide the required 'friendly_name' for the docs"
        )

    fn = workflow.friendly_name() if workflow.friendly_name() else workflow.id(
    )
    en = f" ({workflow.id()})" if fn != workflow.id() else ""
    tn = fn + en

    onelinedescription = prepare_byline(metadata.short_documentation,
                                        metadata.contributors, versions)

    citation = "\n\n".join(
        [el for el in [metadata.citation, metadata.doi] if el])

    formatted_url = (format_rst_link(metadata.documentationUrl,
                                     metadata.documentationUrl)
                     if metadata.documentationUrl else
                     "*No URL to the documentation was provided*")

    toolmetadata = [
        ("ID", f"``{workflow.id()}``"),
        ("URL", formatted_url),
        ("Versions", ", ".join(str(s)
                               for s in versions[::-1]) if versions else ""),
        ("Authors", ", ".join(metadata.contributors)),
        ("Citations", citation),
        ("Created", str(metadata.dateCreated)),
        ("Updated", str(metadata.dateUpdated)),
    ]

    embeddedtoolsraw = {
        f"{s.tool.id()}/{s.tool.version()}": s.tool
        for s in workflow.step_nodes.values()
    }
    embeddedtools = tabulate(
        [[tool.friendly_name(), f"``{key}``"]
         for key, tool in embeddedtoolsraw.items()],
        tablefmt="rst",
    )

    input_headers = ["name", "type", "documentation"]

    required_input_tuples = [[
        i.id(), i.intype.id(), i.doc.doc if i.doc else ""
    ] for i in workflow.tool_inputs() if not i.intype.optional]
    optional_input_tuples = [[
        i.id(), i.intype.id(), i.doc.doc if i.doc else ""
    ] for i in workflow.tool_inputs() if i.intype.optional]

    formatted_inputs = tabulate(required_input_tuples + optional_input_tuples,
                                input_headers,
                                tablefmt="rst")

    formatted_toolversions_array = []
    formatted_toolincludes_array = []
    for v in versions:
        link = get_tool_url(workflow.id(), v)
        formatted_toolincludes_array.append(".. include:: " + link)
        if v == workflow.version():
            formatted_toolversions_array.append(
                f"- {v} (current)"
            )  # + format_rst_link(v + " (current)", link))
        else:
            formatted_toolversions_array.append(
                "- " + format_rst_link(v, link + ".html"))

    output_headers = ["name", "type", "documentation"]
    output_tuples = [[o.id(), o.outtype.id(), o.doc.doc]
                     for o in workflow.tool_outputs()]
    formatted_outputs = tabulate(output_tuples, output_headers, tablefmt="rst")

    tool_prov = ""
    if workflow.tool_provider() is None:
        print("Tool :" + workflow.id() + " has no company")
    else:
        tool_prov = "." + workflow.tool_provider().lower()

    workflow_image = ("" if not SHOW_WORKFLOW_IMAGE else """
Workflow
--------

.. raw:: html

   <script src="https://cdnjs.cloudflare.com/ajax/libs/vue/2.6.10/vue.min.js"></script>
   <script src="https://unpkg.com/vue-cwl/dist/index.js"></script>
   <div id="vue" style="width: 800px; height: 500px; border-radius: 5px; overflow: hidden;">
          <cwl cwl-url="https://unpkg.com/[email protected]/cwl-samples/fastqc.json"></cwl>
   </div>
   <script>
   new Vue({{
       el: '#vue',
       components: {{
           cwl: vueCwl.default
       }}
   }});
   </script>
    """)

    nl = "\n"

    return f"""\
Exemple #10
0
def prepare_workflow_page(workflow: Workflow, versions: List[str]):
    if not workflow:
        return None

    metadata: WorkflowMetadata = workflow.bind_metadata() or workflow.metadata

    if not workflow.friendly_name():
        raise Exception(
            f"Tool '{type(workflow).__name__}' ({workflow.id()}) did not provide the required 'friendly_name' for the docs"
        )

    fn = workflow.friendly_name() if workflow.friendly_name() else workflow.id(
    )
    en = f" ({workflow.id()})" if fn != workflow.id() else ""
    tn = fn + en

    onelinedescription = prepare_byline(workflow.id(),
                                        metadata.short_documentation,
                                        metadata.contributors, versions)

    citation = "\n\n".join(
        [el for el in [metadata.citation, metadata.doi] if el])

    formatted_url = (format_rst_link(metadata.documentationUrl,
                                     metadata.documentationUrl)
                     if metadata.documentationUrl else
                     "*No URL to the documentation was provided*")

    toolmetadata = [
        ("ID", f"``{workflow.id()}``"),
        ("URL", formatted_url),
        ("Versions", ", ".join(str(s)
                               for s in versions[::-1]) if versions else ""),
        ("Authors", ", ".join(metadata.contributors)),
        ("Citations", citation),
        ("Created", str(metadata.dateCreated)),
        ("Updated", str(metadata.dateUpdated)),
    ]

    embeddedtoolsraw = {
        f"{s.tool.id()}/{s.tool.version()}": s.tool
        for s in workflow.step_nodes.values()
    }
    embeddedtools = tabulate(
        [[tool.friendly_name(), f"``{key}``"]
         for key, tool in embeddedtoolsraw.items()],
        tablefmt="rst",
    )

    input_headers = ["name", "type", "documentation"]

    required_input_tuples = [[
        i.id(), i.intype.id(), i.doc.doc if i.doc else ""
    ] for i in workflow.tool_inputs() if not i.intype.optional]
    optional_input_tuples = [[
        i.id(), i.intype.id(), i.doc.doc if i.doc else ""
    ] for i in workflow.tool_inputs() if i.intype.optional]

    formatted_inputs = tabulate(required_input_tuples + optional_input_tuples,
                                input_headers,
                                tablefmt="rst")

    formatted_toolversions_array = []
    formatted_toolincludes_array = []
    for v in versions:
        link = get_tool_url(workflow.id(), v)
        formatted_toolincludes_array.append(".. include:: " + link)
        if v == workflow.version():
            formatted_toolversions_array.append(
                f"- {v} (current)"
            )  # + format_rst_link(v + " (current)", link))
        else:
            formatted_toolversions_array.append(
                "- " + format_rst_link(v, link + ".html"))

    output_headers = ["name", "type", "documentation"]
    output_tuples = [[o.id(), o.outtype.id(), o.doc.doc]
                     for o in workflow.tool_outputs()]
    formatted_outputs = tabulate(output_tuples, output_headers, tablefmt="rst")

    cwl = workflow.translate("cwl",
                             to_console=False,
                             allow_empty_container=True)[0]
    wdl = workflow.translate("wdl",
                             to_console=False,
                             allow_empty_container=True)[0]

    tool_prov = ""
    if workflow.tool_provider() is None:
        print("Tool :" + workflow.id() + " has no company")
    else:
        tool_prov = "." + workflow.tool_provider().lower()

    workflow_image = requote_uri(workflow.versioned_id()) + ".dot.png"

    nl = "\n"

    return f"""\