def get_engine_from_eng(
    eng,
    wid,
    logfile,
    confdir,
    execdir: str,
    cromwell_url: Optional[str],
    cromwell_jar: Optional[str],
):

    engid = str(eng)
    if engid == EngineType.cromwell.value:
        url = cromwell_url or PreparedJob.instance().cromwell.url
        if url:
            Logger.info("Found cromwell_url: " + url)
        return Cromwell(
            identifier=f"cromwell-{wid}",
            logfile=logfile,
            confdir=confdir,
            host=url,
            cromwelljar=cromwell_jar,
            execution_dir=execdir,
        )

    return get_engine_type(eng)(logfile=logfile, execution_dir=execdir)
def abort_wids(sids: List[str], wait=True):
    cm = ConfigManager(db_path=None)
    for sid in sids:
        try:
            row = cm.get_row_for_submission_id_or_path(sid)
            WorkflowManager.mark_aborted(row.execution_dir, row.submission_id)
        except Exception as e:
            Logger.critical(f"Couldn't abort '{sid}': " + str(e))
            raise e

    if wait:
        Logger.info(
            "Waiting until completely aborted. This can take up to a few minutes to complete."
        )
        for sid in sids:
            try:
                wm = ConfigManager.get_from_path_or_submission_lazy(
                    sid, readonly=True)
                check_attempts = 0
                while not wm.database.get_uncached_status().is_in_final_state(
                ):
                    time.sleep(1)
                    check_attempts += 1
                    if check_attempts % 5 == 0:
                        Logger.info(
                            f"Still waiting for '{sid}' to move to final state"
                        )
            except Exception as e:
                Logger.critical(
                    f"Couldn't watch '{sid}' until aborted: {str(e)}")
        Logger.info(f"Jobs {' '.join(sids)} should be completely aborted now")
Esempio n. 3
0
def resolve_tool(
    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow,
                Type[j.Workflow]],
    name=None,
    from_toolshed=False,
    force=False,
    only_toolbox=False,
):
    if isinstance(tool, j.Tool):
        return tool
    elif isclass(tool) and issubclass(tool, (j.Workflow, j.Tool)):
        return tool()

    if not isinstance(tool, str):
        raise TypeError(
            f"Janis is not sure how to resolve a workflow of type: '{type(tool)}'"
        )

    if not only_toolbox:
        fileschemewherelocated = FileScheme.get_type_by_prefix(tool.lower())
        if fileschemewherelocated:
            Logger.info(
                f"Detected remote workflow to localise from '{fileschemewherelocated.__name__}'"
            )
            # Get some unique name for the workflow
            import hashlib

            fn = hashlib.md5(tool.lower().encode()).hexdigest() + ".py"
            outdir = os.path.join(JanisConfiguration.manager().configdir,
                                  "cached")
            os.makedirs(outdir, exist_ok=True)
            dest = os.path.join(outdir, fn)
            Logger.log(f"Localising '{tool}' to '{dest}'")

            fileschemewherelocated("internal").cp_from(
                source=tool.lower(),
                dest=dest,
                report_progress=lambda progress: print(
                    f"Download progress: {progress}"),
                force=force,
            )
            tool = dest

        wf = get_janis_workflow_from_searchname(tool,
                                                ".",
                                                name=name,
                                                include_commandtools=True)

        if wf:
            return wf

    if from_toolshed:
        v = None
        if ":" in tool:
            ps = tool.split(":")
            workflow, v = ps[0], ps[1]

        wf = j.JanisShed.get_tool(tool, v)

        return wf
Esempio n. 4
0
    def remove_by_id(self, id_: str) -> None:
        Logger.info(f"Removing '{id_}' from database")
        with self.with_cursor() as cursor:
            cursor.execute(f"DELETE FROM {self.table_name} WHERE id = ?",
                           (id_, ))

        self.commit()
    def remove_by_id(self, wid: str) -> None:
        Logger.info(f"Removing '{wid}' from database")
        with self.with_cursor() as cursor:
            cursor.execute(
                f"DELETE FROM {TasksDbProvider.table_name} WHERE wid = ?",
                (wid, ))

        self.commit()
Esempio n. 6
0
    def remove_by_ids(self, ids: List[str]) -> None:
        if not isinstance(ids, list):
            ids = [ids]

        Logger.info("Removing ids: " + ", ".join(ids))
        seq = ", ".join(["?"] * len(ids))
        with self.with_cursor() as cursor:
            cursor.execute(
                f"DELETE FROM {self.table_name} WHERE id in ({seq})", ids)

        self.commit()
Esempio n. 7
0
def abort_wids(wids: List[str]):
    for wid in wids:
        try:
            row = ConfigManager.manager().get_lazy_db_connection().get_by_wid(
                wid)
            if row:
                WorkflowManager.mark_aborted(row.outputdir, row.wid)
            else:
                WorkflowManager.mark_aborted(wid, None)
        except Exception as e:
            Logger.critical(f"Couldn't abort '{wid}': " + str(e))
            raise e
Esempio n. 8
0
    def get_config():
        """
        This is here to lazily instantiate the config
        """
        nonlocal cached_outd
        if not cached_outd:

            outd = JanisConfiguration.default()

            if templatename:
                tmpl = janistemplates.get_template(templatename)
                schema = janistemplates.get_schema_for_template(tmpl)

                mapped_schema_to_default = {
                    s.identifier: s.default
                    for s in schema if s.default is not None
                }

                # parse extra params
                description = dedent(tmpl.__doc__) if tmpl.__doc__ else None

                parser = InitArgParser(templatename,
                                       schema,
                                       description=description)
                parsed = parser.parse_args(unparsed_init_args)

                try:
                    # "easier to ask for forgiveness than permission" https://stackoverflow.com/a/610923
                    keys_to_skip = set(tmpl.ignore_init_keys)
                except AttributeError:
                    Logger.log(
                        f"Template '{templatename}' didn't have 'ignore_init_keys'"
                    )
                    keys_to_skip = set()

                outd[JanisConfiguration.Keys.Engine] = EngineType.cromwell
                outd[JanisConfiguration.Keys.Template] = {
                    s.id(): parsed.get(s.id(),
                                       mapped_schema_to_default.get(s.id()))
                    for s in schema if (s.identifier in parsed) or (
                        s.identifier in mapped_schema_to_default
                        and s.identifier not in keys_to_skip)
                }
                outd[JanisConfiguration.Keys.Template][
                    JanisConfiguration.JanisConfigurationTemplate.Keys.
                    Id] = templatename

            cached_outd = stringify_dict_keys_or_return_value(outd)
        return cached_outd
Esempio n. 9
0
def get_engine_from_eng(eng, wid, logfile, confdir, execdir: str, **kwargs):

    if eng == "cromwell":
        url = kwargs.get(
            "cromwell_url") or JanisConfiguration.manager().cromwell.url
        if url:
            Logger.info("Found cromwell_url: " + url)
        return Cromwell(
            identifier=f"cromwell-{wid}",
            logfile=logfile,
            confdir=confdir,
            host=url,
            cromwelljar=kwargs.get("cromwell_jar"),
            execution_dir=execdir,
        )

    return get_engine_type(eng)(logfile=logfile, execution_dir=execdir)
def generate_inputs(
    jc: JanisConfiguration,
    tool: Union[str, j.CommandTool, j.Workflow],
    all=False,
    name=None,
    force=False,
    additional_inputs=None,
    with_resources=False,
    quality_type: List[InputQualityType] = None,
    recipes: List[str] = None,
    hints: dict = None,
):
    toolref, _ = resolve_tool(tool, name, from_toolshed=True, force=force)
    inputsdict = None
    if additional_inputs:
        inputsfile = get_file_from_searchname(additional_inputs, ".")
        inputsdict = parse_dict(inputsfile)

    values_to_ignore = set()
    if recipes:
        if not jc:
            Logger.warn(
                "An internal error occurred when getting recipes from your config, please raise an issue on GitHub."
            )
        else:
            for k in jc.recipes.get_recipe_for_keys(recipes):
                values_to_ignore.add(k)

    if not toolref:
        raise Exception("Couldn't find workflow with name: " + str(tool))

    d = toolref.generate_inputs_override(
        additional_inputs=inputsdict,
        with_resource_overrides=with_resources,
        include_defaults=all,
        values_to_ignore=values_to_ignore,
        quality_type=quality_type,
        hints=hints,
    )

    return d
def run_with_outputs(
    tool: Union[j.CommandTool, j.Workflow],
    inputs: Dict[str, any],
    output_dir: str,
    config: JanisConfiguration = None,
    engine: Optional[str] = None,
    workflow_reference: Optional[str] = None,
):
    """
    Run and WAIT for a Janis workflow to complete. This helper method runs a workflow,
    and returns a dictionary of output values to their output tag. This method MAY throw,
    so ensure it's try-catch wrapped.
    :param tool: An INSTANTIATED tool definition. Seek this from the 'get_janis_workflow_from_searchname' earlier
    :param inputs: A dictionary of pure input values, not file paths.
    :param output_dir: Where to run the execution
    :param config: Optional config, else choose the default at $HOME/.janis/janis.conf
    :param workflow_reference: A reference to the workflow being run, this gets used to write a run.sh file
    :return: A dictionary of output values by the output tag
    """

    job = prepare_job(
        tool=tool,
        output_dir=output_dir,
        required_inputs=inputs,
        jc=config or JanisConfiguration.initial_configuration(None),
        # params to be automatically evaluated
        execution_dir=None,
        inputs={},
        allow_empty_container=False,
        check_files=True,
        container_override={},
        skip_digest_cache=False,
        skip_digest_lookup=False,
        batchrun_reqs=None,
        validation_reqs=None,
        engine=engine,
        hints={},
        keep_intermediate_files=False,
        max_cores=None,
        max_memory=None,
        max_duration=None,
        no_store=True,
        recipes=[],
        run_in_background=None,
        run_in_foreground=None,
        strict_inputs=False,
        watch=False,
        workflow_reference=workflow_reference,
        # don't do extra preprocessing steps
        run_prepare_processing=False,
        localise_all_files=True,
    )

    wm = run_from_jobfile(tool, jobfile=job, wait=True)
    if not wm:
        Logger.critical(f"An error occurred when running workflow {tool.id()}")
        return None

    if not wm.database:
        Logger.critical(
            f"An error occurred when getting the outputs for workflow {tool.id()}"
        )
        return None

    status = wm.database.get_uncached_status()
    if status != TaskStatus.COMPLETED:
        error = ""
        if wm.database.submission_metadata and wm.database.submission_metadata.metadata:
            error = wm.database.submission_metadata.metadata.error or ""
        Logger.critical(
            f"The workflow {tool.id()} ended with status {status}, and hence won't return outputs. Error: {error}"
        )
        return None

    outs = wm.database.outputsDB.get()
    return {
        o.id_: o.value or o.new_path
        for o in outs if o.value or o.new_path
    }
def prepare_job(
    tool: Union[str, j.Tool, Type[j.Tool]],
    # workflow search options
    workflow_reference: Optional[
        str],  # if this is None, no jobfile will be written
    jc: JanisConfiguration,
    engine: Optional[str],
    batchrun_reqs: Optional[BatchRunRequirements],
    validation_reqs: Optional[ValidationRequirements],
    hints: Optional[Dict[str, str]],
    output_dir: Optional[str],
    execution_dir: Optional[str],
    inputs: Union[str, dict],
    required_inputs: dict,
    watch,
    max_cores,
    max_memory,
    max_duration,
    keep_intermediate_files,
    recipes,
    run_in_background,
    run_in_foreground,
    no_store,
    allow_empty_container,
    check_files,
    container_override: dict,
    strict_inputs,
    skip_digest_lookup,
    skip_digest_cache,
    run_prepare_processing,
    db_type: DatabaseTypeToUse = None,
    source_hints: List[str] = None,
    post_run_script: str = None,
    localise_all_files: bool = False,
):

    # organise inputs
    inputsdict = {}

    if recipes:
        valuesfromrecipe = jc.recipes.get_recipe_for_keys(recipes)
        inputsdict.update(valuesfromrecipe)

    inputsdict.update(
        cascade_inputs(
            wf=tool,
            inputs=inputs,
            required_inputs=required_inputs,
            batchrun_options=batchrun_reqs,
            strict_inputs=strict_inputs,
        ))

    output_dir = generate_output_dir_from(tool.id(), output_dir, jc.output_dir)

    if isinstance(tool, DynamicWorkflow):
        tool.constructor(inputsdict, hints)
        inputsdict = tool.modify_inputs(inputsdict, hints)

    should_run_in_background = (
        run_in_background is True
        or jc.run_in_background is True) and not (run_in_foreground is True)

    if post_run_script:
        intermediate_prs = get_file_from_searchname(post_run_script, cwd=".")
        if not intermediate_prs or not os.path.exists(intermediate_prs):
            raise Exception(
                f"Couldn't find file for post_run_script '{post_run_script}'")
        post_run_script = intermediate_prs

    # Download remote files to cache directory
    if localise_all_files:
        cache_dir = os.path.join(jc.config_dir, "remote_file_cache")
        m = RemoteFileLocatorModifier(cache_dir=cache_dir)
        inputsdict = m.inputs_modifier(tool, inputsdict, hints)

    if run_prepare_processing:
        cache_dir = os.path.join(output_dir, "janis/prepare")

        Logger.debug(f"Running janis prepare steps at {cache_dir}")

        os.makedirs(cache_dir, exist_ok=True)
        processors = [
            CwlInputObjectUnwrapperModifier(),
            FileFinderLocatorModifier(cache_dir=cache_dir,
                                      source_hints=source_hints),
            InputFileQualifierModifier(),
            InputTransformerModifier(cache_dir=cache_dir),
            InputChecker(check_file_existence=True),
            ContigChecker(),
        ]

        tool_to_evaluate, new_inputs = PipelineModifierBase.apply_many(
            processors, tool, inputsdict, hints=hints)
        inputsdict = new_inputs

    submission = PreparedJob(
        # job stuff
        workflow_reference=workflow_reference,
        config_dir=jc.config_dir,
        db_path=jc.db_path,
        execution_dir=execution_dir,
        engine=engine or jc.engine,
        cromwell=jc.cromwell,
        template=jc.template,
        notifications=jc.notifications,
        environment=JanisConfigurationEnvironment(
            max_cores=max_cores or jc.environment.max_cores,
            max_memory=max_memory or jc.environment.max_memory,
            max_duration=max_duration or jc.environment.max_duration,
        ),
        run_in_background=should_run_in_background,
        digest_cache_location=jc.digest_cache_location,
        # job information
        inputs=inputsdict,
        output_dir=output_dir,
        keep_intermediate_files=keep_intermediate_files,
        recipes=recipes,
        hints=hints,
        allow_empty_container=allow_empty_container,
        container_override=container_override,
        skip_digest_lookup=skip_digest_lookup,
        skip_digest_cache=skip_digest_cache,
        batchrun=batchrun_reqs,
        store_in_central_db=not no_store,
        skip_file_check=not check_files,
        strict_inputs=strict_inputs,
        validation=validation_reqs,
        # config stuff
        should_watch_if_background=watch,
        call_caching_enabled=jc.call_caching_enabled,
        container_type=jc.container.get_container_type(),
        post_run_script=post_run_script,
    )

    if db_type:
        submission.cromwell = submission.cromwell or JanisConfigurationCromwell(
        )
        submission.cromwell.db_type = db_type

    return submission
def run_from_jobfile(
    workflow: Union[str, j.Tool, Type[j.Tool]],
    jobfile: PreparedJob,
    engine: Union[str, Engine, None] = None,
    wait: bool = False,
    # specific engine args
    cromwell_jar: Optional[str] = None,
    cromwell_url: Optional[str] = None,
):

    cm = ConfigManager(db_path=jobfile.db_path)

    if not workflow:
        raise Exception("Couldn't find workflow with name: " + str(workflow))

    row = cm.create_task_base(
        wf=workflow,
        job=jobfile,
    )

    jobfile.execution_dir = row.execution_dir
    jobfile.output_dir = row.output_dir

    # set logger for submit
    Logger.set_write_level(Logger.CONSOLE_LEVEL)
    logpath = os.path.join(
        WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir, WorkflowManager.WorkflowManagerPath.logs),
        "janis-submit.log",
    )
    Logger.WRITE_LEVELS = {Logger.CONSOLE_LEVEL: (logpath, open(logpath, "a"))}
    Logger.debug(f"Set submission logging to '{logpath}'")
    print(row.submission_id, file=sys.stdout)

    eng = get_engine_from_eng(
        engine or jobfile.engine,
        wid=row.submission_id,
        execdir=WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir, WorkflowManager.WorkflowManagerPath.execution),
        confdir=WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir,
            WorkflowManager.WorkflowManagerPath.configuration),
        logfile=os.path.join(
            WorkflowManager.get_path_for_component_and_dir(
                row.execution_dir, WorkflowManager.WorkflowManagerPath.logs),
            "engine.log",
        ),
        cromwell_jar=cromwell_jar,
        cromwell_url=cromwell_url,
    )

    try:

        wm = WorkflowManager.from_janis(
            submission_id=row.submission_id,
            tool=workflow,
            engine=eng,
            prepared_submission=jobfile,
            wait=wait,
        )
        Logger.log("Finished starting task")
        return wm

    except KeyboardInterrupt:
        Logger.info("Exiting...")
        try:
            wm.abort()
        except:
            pass

    except Exception as e:
        # Have to make sure we stop the engine if something happens when creating the task that causes
        # janis to exit early
        eng.stop_engine()
        raise e

    return wm
def init_template(
    templatename,
    stream=None,
    unparsed_init_args=None,
    output_location=None,
    force=False,
):
    """
    :param templatename:
    :param force:
    :return:
    """
    import ruamel.yaml

    outpath = fully_qualify_filename(output_location
                                     or EnvVariables.config_path.resolve(True))

    cached_outd = None

    def get_config():
        """
        This is here to lazily instantiate the config
        """
        nonlocal cached_outd
        if not cached_outd:

            outd = JanisConfiguration.default()

            if templatename:
                tmpl = janistemplates.get_template(templatename)
                schema = janistemplates.get_schema_for_template(tmpl)

                mapped_schema_to_default = {
                    s.identifier: s.default
                    for s in schema if s.default is not None
                }

                # parse extra params
                description = dedent(tmpl.__doc__) if tmpl.__doc__ else None

                parser = InitArgParser(templatename,
                                       schema,
                                       description=description)
                parsed = parser.parse_args(unparsed_init_args)

                try:
                    # "easier to ask for forgiveness than permission" https://stackoverflow.com/a/610923
                    keys_to_skip = set(tmpl.ignore_init_keys)
                except AttributeError:
                    Logger.log(
                        f"Template '{templatename}' didn't have 'ignore_init_keys'"
                    )
                    keys_to_skip = set()

                outd["engine"] = EngineType.cromwell
                outd["template"] = {
                    s.id(): parsed.get(s.id(),
                                       mapped_schema_to_default.get(s.id()))
                    for s in schema if (s.identifier in parsed) or (
                        s.identifier in mapped_schema_to_default
                        and s.identifier not in keys_to_skip)
                }
                outd["template"]["id"] = templatename

            cached_outd = stringify_dict_keys_or_return_value(outd)
        return cached_outd

    if any(k in unparsed_init_args for k in ("-h", "--help")):
        get_config()

    does_exist = os.path.exists(outpath)
    if does_exist and not force:
        Logger.info(
            f"Janis will skip writing config as file exists at: '{outpath}'")
    else:
        if does_exist:
            Logger.info(f"Overwriting template at '{outpath}'")
        else:
            Logger.info(f"Saving Janis config to '{outpath}'")

        os.makedirs(os.path.dirname(outpath), exist_ok=True)
        val = get_config()
        with open(outpath, "w+") as configpath:
            ruamel.yaml.dump(val, configpath, default_flow_style=False)

    if stream:
        ruamel.yaml.dump(get_config(), sys.stdout, default_flow_style=False)
Esempio n. 15
0
def fromjanis(
    workflow: Union[str, j.Tool, Type[j.Tool]],
    name: str = None,
    engine: Union[str, Engine] = None,
    filescheme: Union[str, FileScheme] = LocalFileScheme(),
    validation_reqs=None,
    batchrun_reqs=None,
    hints: Optional[Dict[str, str]] = None,
    output_dir: Optional[str] = None,
    dryrun: bool = False,
    inputs: Union[str, dict] = None,
    required_inputs: dict = None,
    watch=True,
    max_cores=None,
    max_memory=None,
    force=False,
    keep_intermediate_files=False,
    recipes=None,
    run_in_background=True,
    run_in_foreground=None,
    dbconfig=None,
    only_toolbox=False,
    no_store=False,
    allow_empty_container=False,
    check_files=True,
    container_override: dict = None,
    **kwargs,
):
    cm = ConfigManager.manager()
    jc = JanisConfiguration.manager()

    wf: Optional[Tool] = resolve_tool(
        tool=workflow,
        name=name,
        from_toolshed=True,
        only_toolbox=only_toolbox,
        force=force,
    )
    if not wf:
        raise Exception("Couldn't find workflow with name: " + str(workflow))

    # if isinstance(tool, j.CommandTool):
    #     tool = tool.wrapped_in_wf()
    # elif isinstance(tool, j.CodeTool):
    #     tool = tool.wrapped_in_wf()

    # organise inputs
    inputsdict = {}

    if recipes:
        valuesfromrecipe = jc.recipes.get_recipe_for_keys(recipes)
        inputsdict.update(valuesfromrecipe)

    inputsdict.update(
        cascade_inputs(
            wf=wf,
            inputs=inputs,
            required_inputs=required_inputs,
            batchrun_options=batchrun_reqs,
        ))

    row = cm.create_task_base(wf,
                              outdir=output_dir,
                              store_in_centraldb=not no_store)
    print(row.wid, file=sys.stdout)

    engine = engine or jc.engine

    eng = get_engine_from_eng(
        engine,
        wid=row.wid,
        execdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.execution),
        confdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.configuration),
        logfile=os.path.join(
            WorkflowManager.get_path_for_component_and_dir(
                row.outputdir, WorkflowManager.WorkflowManagerPath.logs),
            "engine.log",
        ),
        watch=watch,
        **kwargs,
    )
    fs = get_filescheme_from_fs(filescheme, **kwargs)
    environment = Environment(f"custom_{wf.id()}", eng, fs)

    try:

        # Note: run_in_foreground can be None, so
        # (not (run_in_foreground is True)) != (run_in_foreground is False)

        should_run_in_background = (run_in_background is True
                                    or jc.run_in_background is True
                                    ) and not (run_in_foreground is True)

        tm = cm.start_task(
            wid=row.wid,
            tool=wf,
            environment=environment,
            validation_requirements=validation_reqs,
            batchrun_requirements=batchrun_reqs,
            task_path=row.outputdir,
            hints=hints,
            inputs_dict=inputsdict,
            dryrun=dryrun,
            watch=watch,
            max_cores=max_cores,
            max_memory=max_memory,
            keep_intermediate_files=keep_intermediate_files,
            run_in_background=should_run_in_background,
            dbconfig=dbconfig,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )
        Logger.log("Finished starting task task")
        return tm

    except KeyboardInterrupt:
        Logger.info("Exiting...")

    except Exception as e:
        # Have to make sure we stop the engine if something happens when creating the task that causes
        # janis to exit early
        environment.engine.stop_engine()
        raise e