Esempi in Python per WorkflowManager, esempi in Python per janis_assistant.management.workflowmanager.WorkflowManager

Esempio n. 1

0

Mostra file

File: main.py Progetto: PMCC-BioinformaticsCore/janis-assistant

def abort_wids(sids: List[str], wait=True):
    cm = ConfigManager(db_path=None)
    for sid in sids:
        try:
            row = cm.get_row_for_submission_id_or_path(sid)
            WorkflowManager.mark_aborted(row.execution_dir, row.submission_id)
        except Exception as e:
            Logger.critical(f"Couldn't abort '{sid}': " + str(e))
            raise e

    if wait:
        Logger.info(
            "Waiting until completely aborted. This can take up to a few minutes to complete."
        )
        for sid in sids:
            try:
                wm = ConfigManager.get_from_path_or_submission_lazy(
                    sid, readonly=True)
                check_attempts = 0
                while not wm.database.get_uncached_status().is_in_final_state(
                ):
                    time.sleep(1)
                    check_attempts += 1
                    if check_attempts % 5 == 0:
                        Logger.info(
                            f"Still waiting for '{sid}' to move to final state"
                        )
            except Exception as e:
                Logger.critical(
                    f"Couldn't watch '{sid}' until aborted: {str(e)}")
        Logger.info(f"Jobs {' '.join(sids)} should be completely aborted now")

Esempio n. 2

0

Mostra file

File: configmanager.py Progetto: PMCC-BioinformaticsCore/janis-assistant

    def create_task_base(self, wf: Workflow, job: PreparedJob):

        forbiddenids = set()
        if job.store_in_central_db:
            try:
                with self.with_cursor() as cursor:
                    forbiddenids = set(
                        t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall()
                    )
            except sqlite3.OperationalError as e:
                if "no such column: id" in repr(e):
                    from shutil import move

                    dt = datetime.utcnow()
                    np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}"
                    Logger.warn(f"Moving old janis-db to '{np}'")
                    move(job.db_path, np)
                    self._taskDB = None
                    return self.create_task_base(wf=wf, job=job)
                raise

        submission_id = generate_new_id(forbiddenids)

        output_dir = fully_qualify_filename(job.output_dir)

        if not job.execution_dir:
            job.execution_dir = os.path.join(output_dir, "janis")
            Logger.debug(
                f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}"
            )
        job.execution_dir = fully_qualify_filename(job.execution_dir)

        Logger.info(
            f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}"
        )

        row = TaskRow(
            submission_id, execution_dir=job.execution_dir, output_dir=output_dir
        )
        WorkflowManager.create_dir_structure(job.execution_dir)

        if job.store_in_central_db:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row

Esempio n. 3

0

Mostra file

File: main.py Progetto: drtconway/janis-assistant

def abort_wids(wids: List[str]):
    for wid in wids:
        try:
            row = ConfigManager.manager().get_lazy_db_connection().get_by_wid(
                wid)
            if row:
                WorkflowManager.mark_aborted(row.outputdir, row.wid)
            else:
                WorkflowManager.mark_aborted(wid, None)
        except Exception as e:
            Logger.critical(f"Couldn't abort '{wid}': " + str(e))
            raise e

Esempio n. 4

0

Mostra file

    def from_wid(self, wid, readonly=False):
        self.readonly = readonly
        with self.with_cursor() as cursor:
            path = cursor.execute(
                "SELECT outputdir FROM tasks where wid=?", (wid,)
            ).fetchone()
        if not path:
            expanded_path = fully_qualify_filename(wid)
            if os.path.exists(expanded_path):
                return WorkflowManager.from_path_get_latest(
                    expanded_path, readonly=readonly
                )

            raise Exception(f"Couldn't find task with id='{wid}'")
        return WorkflowManager.from_path_with_wid(path[0], wid=wid, readonly=readonly)

Esempio n. 5

0

Mostra file

File: cli.py Progetto: PMCC-BioinformaticsCore/janis-assistant

def do_prepare(args):

    job, wf = prepare_from_args(args, run_prepare_processing=True)

    d = job.to_dict()

    WorkflowManager.write_prepared_submission_file(prepared_job=job,
                                                   output_dir=job.output_dir,
                                                   force_write=True)

    script_location = os.path.join(job.output_dir, "run.sh")
    Logger.info("Job prepared successfully, you can run your workflow with:")
    Logger.info(f"\tsh {script_location}")

    print(dict_to_yaml_string(d))

Esempio n. 6

0

Mostra file

    def query_tasks(self, status, name) -> Dict[str, WorkflowModel]:

        rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks()

        failed = []
        relevant = {}

        for row in rows:
            if not os.path.exists(row.outputdir):
                failed.append(row.wid)
                continue
            try:
                metadb = WorkflowManager.has(
                    row.outputdir, wid=row.wid, name=name, status=status
                )
                if metadb:
                    model = metadb.to_model()
                    model.outdir = row.outputdir
                    relevant[row.wid] = model
            except Exception as e:
                Logger.critical(f"Couldn't check workflow '{row.wid}': {e}")
                failed.append(row.wid)

        if failed:
            failedstr = ", ".join(failed)
            Logger.warn(
                f"Couldn't get information for tasks: {failedstr}, run"
                f"'janis cleanup' to clean up your tasks."
            )

        return relevant

Esempio n. 7

0

Mostra file

    def cleanup_missing_tasks(self):
        from tabulate import tabulate

        rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks()

        failed = []

        for row in rows:
            if not os.path.exists(row.outputdir):
                failed.append((row.wid, row.outputdir))
                continue
            try:
                _ = WorkflowManager.from_path_with_wid(
                    row.outputdir, row.wid, readonly=True
                )
            except Exception as e:
                failed.append((row.wid, row.outputdir))

        if failed:
            Logger.warn(f"Removing the following tasks:\n" + tabulate(failed))

            if "y" in str(input(f"Remove {len(failed)} tasks (Y / n)? ")).lower():
                self.get_lazy_db_connection().remove_by_ids([r[0] for r in failed])
                Logger.info("Cleaned up tasks")
            else:
                Logger.info("Skipping cleaning of tasks")

Esempio n. 8

0

Mostra file

    def test_basic_extension_override(self):
        w = j.WorkflowBuilder("wf")
        w.step("stp", ct)
        w.output("out", source=w.stp.out, extension="_fastqc.txt")

        outputs = WorkflowManager.evaluate_output_params(wf=w,
                                                         inputs={},
                                                         submission_id="SID",
                                                         run_id="RID")

        self.assertEqual("_fastqc.txt", outputs[0].extension)

Esempio n. 9

0

Mostra file

File: configmanager.py Progetto: PMCC-BioinformaticsCore/janis-assistant

    def get_from_path_or_submission(
        self, submission_id, readonly: bool, perform_path_check=True
    ):
        if perform_path_check:
            expanded_path = fully_qualify_filename(submission_id)
            if os.path.exists(expanded_path):
                return WorkflowManager.from_path_get_latest_manager(
                    expanded_path, readonly=readonly
                )

        potential_submission = self.get_lazy_db_connection().get_by_id(submission_id)
        if potential_submission:
            return WorkflowManager.from_path_with_submission_id(
                potential_submission.execution_dir,
                submission_id=submission_id,
                readonly=readonly,
            )

        raise Exception(
            f"Couldn't find task with id='{submission_id}', and no directory was found "
        )

Esempio n. 10

0

Mostra file

    def remove_task(self, task: Union[str, TaskRow], keep_output: bool):
        if isinstance(task, str):
            wid = task
            task = self.get_lazy_db_connection().get_by_wid(task)
            if task is None:
                raise Exception("Couldn't find workflow with ID = " + wid)

        tm = WorkflowManager.from_path_with_wid(task.outputdir, task.wid)
        tm.remove_exec_dir()
        tm.database.close()

        if not keep_output and os.path.exists(task.outputdir):
            Logger.info("Removing " + task.outputdir)
            rmtree(task.outputdir)
        else:
            Logger.info("Skipping output dir deletion, can't find: " + task.outputdir)

        self.get_lazy_db_connection().remove_by_id(task.wid)
        Logger.info("Deleted task: " + task.wid)

Esempio n. 11

0

Mostra file

    def start_task(
        self,
        wid: str,
        tool: Tool,
        task_path: str,
        environment: Environment,
        hints: Dict[str, str],
        validation_requirements: Optional[ValidationRequirements],
        batchrun_requirements: Optional[BatchRunRequirements],
        inputs_dict: dict = None,
        dryrun=False,
        watch=True,
        max_cores=None,
        max_memory=None,
        keep_intermediate_files=False,
        run_in_background=True,
        dbconfig=None,
        allow_empty_container=False,
        container_override: dict = None,
        check_files=True,
    ) -> WorkflowManager:

        return WorkflowManager.from_janis(
            wid,
            tool=tool,
            outdir=task_path,
            environment=environment,
            hints=hints,
            inputs_dict=inputs_dict,
            validation_requirements=validation_requirements,
            batchrun_requirements=batchrun_requirements,
            dryrun=dryrun,
            watch=watch,
            max_cores=max_cores,
            max_memory=max_memory,
            keep_intermediate_files=keep_intermediate_files,
            run_in_background=run_in_background,
            dbconfig=dbconfig,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )

Esempio n. 12

0

Mostra file

File: configmanager.py Progetto: PMCC-BioinformaticsCore/janis-assistant

    def get_from_path_or_submission_lazy(
        submission_id, readonly: bool, db_path: Optional[str] = None,
    ):
        """
        2020-10-01 mfranklin:
            Probably the method you want to get a WorkflowManager from submissionID:

        :return: WorkflowManager of the submission_id (or THROWS)
        """
        expanded_path = fully_qualify_filename(submission_id)
        if os.path.exists(expanded_path):
            return WorkflowManager.from_path_get_latest_manager(
                expanded_path, readonly=readonly
            )

        return ConfigManager(
            db_path=db_path, readonly=True
        ).get_from_path_or_submission(
            submission_id=submission_id, readonly=readonly, perform_path_check=False
        )

Esempio n. 13

0

Mostra file

File: configmanager.py Progetto: PMCC-BioinformaticsCore/janis-assistant

    def get_row_for_submission_id_or_path(self, submission_id) -> TaskRow:

        potential_submission = self.get_lazy_db_connection().get_by_id(submission_id)
        if potential_submission:
            return potential_submission

        expanded_path = fully_qualify_filename(submission_id)
        if os.path.exists(expanded_path):
            (execpath, sid) = WorkflowManager.from_path_get_latest_submission_id(
                expanded_path
            )
            return TaskRow(
                execution_dir=execpath,
                submission_id=sid,
                output_dir=None,
                timestamp=None,
            )

        raise Exception(
            f"Couldn't find task with id='{submission_id}', and no directory was found."
        )

Esempio n. 14

0

Mostra file

File: main.py Progetto: PMCC-BioinformaticsCore/janis-assistant

def translate(
    config: JanisConfiguration,
    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow,
                Type[j.Workflow]],
    translation: str,
    name: str = None,
    hints: Optional[Dict[str, str]] = None,
    output_dir: Optional[str] = None,
    inputs: Union[str, dict] = None,
    allow_empty_container=False,
    container_override=None,
    skip_digest_lookup=False,
    skip_digest_cache=False,
    recipes: List[str] = None,
    **kwargs,
):

    toolref, _ = resolve_tool(tool, name, from_toolshed=True)

    if not toolref:
        raise Exception(f"Couldn't find tool: '{tool}'")

    inputsdict = {}
    if recipes:
        valuesfromrecipe = config.recipes.get_recipe_for_keys(recipes)
        inputsdict.update(valuesfromrecipe)

    inputsdict.update(
        cascade_inputs(
            wf=None,
            inputs=inputs,
            required_inputs=None,
        ))

    if isinstance(toolref, DynamicWorkflow):
        if not inputsdict:
            raise Exception(
                "Dynamic workflows cannot be translated without the inputs")

        toolref.constructor(inputsdict, hints)
        inputsdict = toolref.modify_inputs(inputsdict, hints)

    container_overrides = container_override
    if not skip_digest_lookup:
        container_overrides = WorkflowManager.prepare_container_override(
            toolref,
            container_override,
            cache_location=config.digest_cache_location,
            skip_digest_cache=skip_digest_cache,
        )

    if isinstance(toolref, j.WorkflowBase):
        wfstr, _, _ = toolref.translate(
            translation,
            to_console=False,
            to_disk=bool(output_dir),
            export_path=output_dir or "./{language}",
            hints=hints,
            additional_inputs=inputsdict,
            allow_empty_container=allow_empty_container,
            container_override=container_overrides,
        )
    elif isinstance(toolref, (j.CommandTool, j.CodeTool)):
        wfstr = toolref.translate(
            translation=translation,
            to_console=False,
            to_disk=bool(output_dir),
            export_path=output_dir or "./{language}",
            allow_empty_container=allow_empty_container,
            container_override=container_overrides,
        )

    else:
        name = toolref.__name__ if isclass(
            toolref) else toolref.__class__.__name__
        raise Exception("Unsupported tool type: " + name)

    print(wfstr, file=sys.stdout)
    return wfstr

Esempio n. 15

0

Mostra file

File: main.py Progetto: drtconway/janis-assistant

def fromjanis(
    workflow: Union[str, j.Tool, Type[j.Tool]],
    name: str = None,
    engine: Union[str, Engine] = None,
    filescheme: Union[str, FileScheme] = LocalFileScheme(),
    validation_reqs=None,
    batchrun_reqs=None,
    hints: Optional[Dict[str, str]] = None,
    output_dir: Optional[str] = None,
    dryrun: bool = False,
    inputs: Union[str, dict] = None,
    required_inputs: dict = None,
    watch=True,
    max_cores=None,
    max_memory=None,
    force=False,
    keep_intermediate_files=False,
    recipes=None,
    run_in_background=True,
    run_in_foreground=None,
    dbconfig=None,
    only_toolbox=False,
    no_store=False,
    allow_empty_container=False,
    check_files=True,
    container_override: dict = None,
    **kwargs,
):
    cm = ConfigManager.manager()
    jc = JanisConfiguration.manager()

    wf: Optional[Tool] = resolve_tool(
        tool=workflow,
        name=name,
        from_toolshed=True,
        only_toolbox=only_toolbox,
        force=force,
    )
    if not wf:
        raise Exception("Couldn't find workflow with name: " + str(workflow))

    # if isinstance(tool, j.CommandTool):
    #     tool = tool.wrapped_in_wf()
    # elif isinstance(tool, j.CodeTool):
    #     tool = tool.wrapped_in_wf()

    # organise inputs
    inputsdict = {}

    if recipes:
        valuesfromrecipe = jc.recipes.get_recipe_for_keys(recipes)
        inputsdict.update(valuesfromrecipe)

    inputsdict.update(
        cascade_inputs(
            wf=wf,
            inputs=inputs,
            required_inputs=required_inputs,
            batchrun_options=batchrun_reqs,
        ))

    row = cm.create_task_base(wf,
                              outdir=output_dir,
                              store_in_centraldb=not no_store)
    print(row.wid, file=sys.stdout)

    engine = engine or jc.engine

    eng = get_engine_from_eng(
        engine,
        wid=row.wid,
        execdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.execution),
        confdir=WorkflowManager.get_path_for_component_and_dir(
            row.outputdir, WorkflowManager.WorkflowManagerPath.configuration),
        logfile=os.path.join(
            WorkflowManager.get_path_for_component_and_dir(
                row.outputdir, WorkflowManager.WorkflowManagerPath.logs),
            "engine.log",
        ),
        watch=watch,
        **kwargs,
    )
    fs = get_filescheme_from_fs(filescheme, **kwargs)
    environment = Environment(f"custom_{wf.id()}", eng, fs)

    try:

        # Note: run_in_foreground can be None, so
        # (not (run_in_foreground is True)) != (run_in_foreground is False)

        should_run_in_background = (run_in_background is True
                                    or jc.run_in_background is True
                                    ) and not (run_in_foreground is True)

        tm = cm.start_task(
            wid=row.wid,
            tool=wf,
            environment=environment,
            validation_requirements=validation_reqs,
            batchrun_requirements=batchrun_reqs,
            task_path=row.outputdir,
            hints=hints,
            inputs_dict=inputsdict,
            dryrun=dryrun,
            watch=watch,
            max_cores=max_cores,
            max_memory=max_memory,
            keep_intermediate_files=keep_intermediate_files,
            run_in_background=should_run_in_background,
            dbconfig=dbconfig,
            allow_empty_container=allow_empty_container,
            container_override=container_override,
            check_files=check_files,
        )
        Logger.log("Finished starting task task")
        return tm

    except KeyboardInterrupt:
        Logger.info("Exiting...")

    except Exception as e:
        # Have to make sure we stop the engine if something happens when creating the task that causes
        # janis to exit early
        environment.engine.stop_engine()
        raise e

Esempio n. 16

0

Mostra file

    def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True):
        config = JanisConfiguration.manager()

        """
        If you don't spec
        
        """

        if not outdir and not config.outputdir:
            raise Exception(
                f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' "
                f"in your configuration)"
            )

        default_outdir = None

        if config.outputdir:
            default_outdir = os.path.join(config.outputdir, wf.id())

        forbiddenids = set()
        if store_in_centraldb:
            with self.with_cursor() as cursor:
                forbiddenids = set(
                    t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall()
                )
        if outdir:
            if os.path.exists(outdir):
                # this should theoretically scoop through all the ones in the taskDB and
                # add them to the forbidden ones, though this might cause more issues for now.
                forbiddenids = forbiddenids.union(set(os.listdir(outdir)))
        else:
            if os.path.exists(default_outdir):
                forbiddenids = forbiddenids.union(set(os.listdir(default_outdir)))

        wid = generate_new_id(forbiddenids)

        task_path = outdir
        if not task_path:
            od = default_outdir
            dt = datetime.now().strftime("%Y%m%d_%H%M%S")
            task_path = os.path.join(od, f"{dt}_{wid}/")

        task_path = fully_qualify_filename(task_path)

        Logger.info(f"Starting task with id = '{wid}'")

        row = TaskRow(wid, task_path)
        WorkflowManager.create_dir_structure(task_path)

        if store_in_centraldb:
            self.get_lazy_db_connection().insert_task(row)
        else:
            Logger.info(
                f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'"
            )

        if self._connection:
            self._connection.commit()
            self._connection.close()
            self._taskDB = None
            self._connection = None
        return row

Esempio n. 17

0

Mostra file

File: main.py Progetto: PMCC-BioinformaticsCore/janis-assistant

def run_from_jobfile(
    workflow: Union[str, j.Tool, Type[j.Tool]],
    jobfile: PreparedJob,
    engine: Union[str, Engine, None] = None,
    wait: bool = False,
    # specific engine args
    cromwell_jar: Optional[str] = None,
    cromwell_url: Optional[str] = None,
):

    cm = ConfigManager(db_path=jobfile.db_path)

    if not workflow:
        raise Exception("Couldn't find workflow with name: " + str(workflow))

    row = cm.create_task_base(
        wf=workflow,
        job=jobfile,
    )

    jobfile.execution_dir = row.execution_dir
    jobfile.output_dir = row.output_dir

    # set logger for submit
    Logger.set_write_level(Logger.CONSOLE_LEVEL)
    logpath = os.path.join(
        WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir, WorkflowManager.WorkflowManagerPath.logs),
        "janis-submit.log",
    )
    Logger.WRITE_LEVELS = {Logger.CONSOLE_LEVEL: (logpath, open(logpath, "a"))}
    Logger.debug(f"Set submission logging to '{logpath}'")
    print(row.submission_id, file=sys.stdout)

    eng = get_engine_from_eng(
        engine or jobfile.engine,
        wid=row.submission_id,
        execdir=WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir, WorkflowManager.WorkflowManagerPath.execution),
        confdir=WorkflowManager.get_path_for_component_and_dir(
            row.execution_dir,
            WorkflowManager.WorkflowManagerPath.configuration),
        logfile=os.path.join(
            WorkflowManager.get_path_for_component_and_dir(
                row.execution_dir, WorkflowManager.WorkflowManagerPath.logs),
            "engine.log",
        ),
        cromwell_jar=cromwell_jar,
        cromwell_url=cromwell_url,
    )

    try:

        wm = WorkflowManager.from_janis(
            submission_id=row.submission_id,
            tool=workflow,
            engine=eng,
            prepared_submission=jobfile,
            wait=wait,
        )
        Logger.log("Finished starting task")
        return wm

    except KeyboardInterrupt:
        Logger.info("Exiting...")
        try:
            wm.abort()
        except:
            pass

    except Exception as e:
        # Have to make sure we stop the engine if something happens when creating the task that causes
        # janis to exit early
        eng.stop_engine()
        raise e

    return wm