def abort_wids(sids: List[str], wait=True): cm = ConfigManager(db_path=None) for sid in sids: try: row = cm.get_row_for_submission_id_or_path(sid) WorkflowManager.mark_aborted(row.execution_dir, row.submission_id) except Exception as e: Logger.critical(f"Couldn't abort '{sid}': " + str(e)) raise e if wait: Logger.info( "Waiting until completely aborted. This can take up to a few minutes to complete." ) for sid in sids: try: wm = ConfigManager.get_from_path_or_submission_lazy( sid, readonly=True) check_attempts = 0 while not wm.database.get_uncached_status().is_in_final_state( ): time.sleep(1) check_attempts += 1 if check_attempts % 5 == 0: Logger.info( f"Still waiting for '{sid}' to move to final state" ) except Exception as e: Logger.critical( f"Couldn't watch '{sid}' until aborted: {str(e)}") Logger.info(f"Jobs {' '.join(sids)} should be completely aborted now")
def create_task_base(self, wf: Workflow, job: PreparedJob): forbiddenids = set() if job.store_in_central_db: try: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall() ) except sqlite3.OperationalError as e: if "no such column: id" in repr(e): from shutil import move dt = datetime.utcnow() np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}" Logger.warn(f"Moving old janis-db to '{np}'") move(job.db_path, np) self._taskDB = None return self.create_task_base(wf=wf, job=job) raise submission_id = generate_new_id(forbiddenids) output_dir = fully_qualify_filename(job.output_dir) if not job.execution_dir: job.execution_dir = os.path.join(output_dir, "janis") Logger.debug( f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}" ) job.execution_dir = fully_qualify_filename(job.execution_dir) Logger.info( f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}" ) row = TaskRow( submission_id, execution_dir=job.execution_dir, output_dir=output_dir ) WorkflowManager.create_dir_structure(job.execution_dir) if job.store_in_central_db: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def abort_wids(wids: List[str]): for wid in wids: try: row = ConfigManager.manager().get_lazy_db_connection().get_by_wid( wid) if row: WorkflowManager.mark_aborted(row.outputdir, row.wid) else: WorkflowManager.mark_aborted(wid, None) except Exception as e: Logger.critical(f"Couldn't abort '{wid}': " + str(e)) raise e
def from_wid(self, wid, readonly=False): self.readonly = readonly with self.with_cursor() as cursor: path = cursor.execute( "SELECT outputdir FROM tasks where wid=?", (wid,) ).fetchone() if not path: expanded_path = fully_qualify_filename(wid) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest( expanded_path, readonly=readonly ) raise Exception(f"Couldn't find task with id='{wid}'") return WorkflowManager.from_path_with_wid(path[0], wid=wid, readonly=readonly)
def do_prepare(args): job, wf = prepare_from_args(args, run_prepare_processing=True) d = job.to_dict() WorkflowManager.write_prepared_submission_file(prepared_job=job, output_dir=job.output_dir, force_write=True) script_location = os.path.join(job.output_dir, "run.sh") Logger.info("Job prepared successfully, you can run your workflow with:") Logger.info(f"\tsh {script_location}") print(dict_to_yaml_string(d))
def query_tasks(self, status, name) -> Dict[str, WorkflowModel]: rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks() failed = [] relevant = {} for row in rows: if not os.path.exists(row.outputdir): failed.append(row.wid) continue try: metadb = WorkflowManager.has( row.outputdir, wid=row.wid, name=name, status=status ) if metadb: model = metadb.to_model() model.outdir = row.outputdir relevant[row.wid] = model except Exception as e: Logger.critical(f"Couldn't check workflow '{row.wid}': {e}") failed.append(row.wid) if failed: failedstr = ", ".join(failed) Logger.warn( f"Couldn't get information for tasks: {failedstr}, run" f"'janis cleanup' to clean up your tasks." ) return relevant
def cleanup_missing_tasks(self): from tabulate import tabulate rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks() failed = [] for row in rows: if not os.path.exists(row.outputdir): failed.append((row.wid, row.outputdir)) continue try: _ = WorkflowManager.from_path_with_wid( row.outputdir, row.wid, readonly=True ) except Exception as e: failed.append((row.wid, row.outputdir)) if failed: Logger.warn(f"Removing the following tasks:\n" + tabulate(failed)) if "y" in str(input(f"Remove {len(failed)} tasks (Y / n)? ")).lower(): self.get_lazy_db_connection().remove_by_ids([r[0] for r in failed]) Logger.info("Cleaned up tasks") else: Logger.info("Skipping cleaning of tasks")
def test_basic_extension_override(self): w = j.WorkflowBuilder("wf") w.step("stp", ct) w.output("out", source=w.stp.out, extension="_fastqc.txt") outputs = WorkflowManager.evaluate_output_params(wf=w, inputs={}, submission_id="SID", run_id="RID") self.assertEqual("_fastqc.txt", outputs[0].extension)
def get_from_path_or_submission( self, submission_id, readonly: bool, perform_path_check=True ): if perform_path_check: expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest_manager( expanded_path, readonly=readonly ) potential_submission = self.get_lazy_db_connection().get_by_id(submission_id) if potential_submission: return WorkflowManager.from_path_with_submission_id( potential_submission.execution_dir, submission_id=submission_id, readonly=readonly, ) raise Exception( f"Couldn't find task with id='{submission_id}', and no directory was found " )
def remove_task(self, task: Union[str, TaskRow], keep_output: bool): if isinstance(task, str): wid = task task = self.get_lazy_db_connection().get_by_wid(task) if task is None: raise Exception("Couldn't find workflow with ID = " + wid) tm = WorkflowManager.from_path_with_wid(task.outputdir, task.wid) tm.remove_exec_dir() tm.database.close() if not keep_output and os.path.exists(task.outputdir): Logger.info("Removing " + task.outputdir) rmtree(task.outputdir) else: Logger.info("Skipping output dir deletion, can't find: " + task.outputdir) self.get_lazy_db_connection().remove_by_id(task.wid) Logger.info("Deleted task: " + task.wid)
def start_task( self, wid: str, tool: Tool, task_path: str, environment: Environment, hints: Dict[str, str], validation_requirements: Optional[ValidationRequirements], batchrun_requirements: Optional[BatchRunRequirements], inputs_dict: dict = None, dryrun=False, watch=True, max_cores=None, max_memory=None, keep_intermediate_files=False, run_in_background=True, dbconfig=None, allow_empty_container=False, container_override: dict = None, check_files=True, ) -> WorkflowManager: return WorkflowManager.from_janis( wid, tool=tool, outdir=task_path, environment=environment, hints=hints, inputs_dict=inputs_dict, validation_requirements=validation_requirements, batchrun_requirements=batchrun_requirements, dryrun=dryrun, watch=watch, max_cores=max_cores, max_memory=max_memory, keep_intermediate_files=keep_intermediate_files, run_in_background=run_in_background, dbconfig=dbconfig, allow_empty_container=allow_empty_container, container_override=container_override, check_files=check_files, )
def get_from_path_or_submission_lazy( submission_id, readonly: bool, db_path: Optional[str] = None, ): """ 2020-10-01 mfranklin: Probably the method you want to get a WorkflowManager from submissionID: :return: WorkflowManager of the submission_id (or THROWS) """ expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): return WorkflowManager.from_path_get_latest_manager( expanded_path, readonly=readonly ) return ConfigManager( db_path=db_path, readonly=True ).get_from_path_or_submission( submission_id=submission_id, readonly=readonly, perform_path_check=False )
def get_row_for_submission_id_or_path(self, submission_id) -> TaskRow: potential_submission = self.get_lazy_db_connection().get_by_id(submission_id) if potential_submission: return potential_submission expanded_path = fully_qualify_filename(submission_id) if os.path.exists(expanded_path): (execpath, sid) = WorkflowManager.from_path_get_latest_submission_id( expanded_path ) return TaskRow( execution_dir=execpath, submission_id=sid, output_dir=None, timestamp=None, ) raise Exception( f"Couldn't find task with id='{submission_id}', and no directory was found." )
def translate( config: JanisConfiguration, tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow, Type[j.Workflow]], translation: str, name: str = None, hints: Optional[Dict[str, str]] = None, output_dir: Optional[str] = None, inputs: Union[str, dict] = None, allow_empty_container=False, container_override=None, skip_digest_lookup=False, skip_digest_cache=False, recipes: List[str] = None, **kwargs, ): toolref, _ = resolve_tool(tool, name, from_toolshed=True) if not toolref: raise Exception(f"Couldn't find tool: '{tool}'") inputsdict = {} if recipes: valuesfromrecipe = config.recipes.get_recipe_for_keys(recipes) inputsdict.update(valuesfromrecipe) inputsdict.update( cascade_inputs( wf=None, inputs=inputs, required_inputs=None, )) if isinstance(toolref, DynamicWorkflow): if not inputsdict: raise Exception( "Dynamic workflows cannot be translated without the inputs") toolref.constructor(inputsdict, hints) inputsdict = toolref.modify_inputs(inputsdict, hints) container_overrides = container_override if not skip_digest_lookup: container_overrides = WorkflowManager.prepare_container_override( toolref, container_override, cache_location=config.digest_cache_location, skip_digest_cache=skip_digest_cache, ) if isinstance(toolref, j.WorkflowBase): wfstr, _, _ = toolref.translate( translation, to_console=False, to_disk=bool(output_dir), export_path=output_dir or "./{language}", hints=hints, additional_inputs=inputsdict, allow_empty_container=allow_empty_container, container_override=container_overrides, ) elif isinstance(toolref, (j.CommandTool, j.CodeTool)): wfstr = toolref.translate( translation=translation, to_console=False, to_disk=bool(output_dir), export_path=output_dir or "./{language}", allow_empty_container=allow_empty_container, container_override=container_overrides, ) else: name = toolref.__name__ if isclass( toolref) else toolref.__class__.__name__ raise Exception("Unsupported tool type: " + name) print(wfstr, file=sys.stdout) return wfstr
def fromjanis( workflow: Union[str, j.Tool, Type[j.Tool]], name: str = None, engine: Union[str, Engine] = None, filescheme: Union[str, FileScheme] = LocalFileScheme(), validation_reqs=None, batchrun_reqs=None, hints: Optional[Dict[str, str]] = None, output_dir: Optional[str] = None, dryrun: bool = False, inputs: Union[str, dict] = None, required_inputs: dict = None, watch=True, max_cores=None, max_memory=None, force=False, keep_intermediate_files=False, recipes=None, run_in_background=True, run_in_foreground=None, dbconfig=None, only_toolbox=False, no_store=False, allow_empty_container=False, check_files=True, container_override: dict = None, **kwargs, ): cm = ConfigManager.manager() jc = JanisConfiguration.manager() wf: Optional[Tool] = resolve_tool( tool=workflow, name=name, from_toolshed=True, only_toolbox=only_toolbox, force=force, ) if not wf: raise Exception("Couldn't find workflow with name: " + str(workflow)) # if isinstance(tool, j.CommandTool): # tool = tool.wrapped_in_wf() # elif isinstance(tool, j.CodeTool): # tool = tool.wrapped_in_wf() # organise inputs inputsdict = {} if recipes: valuesfromrecipe = jc.recipes.get_recipe_for_keys(recipes) inputsdict.update(valuesfromrecipe) inputsdict.update( cascade_inputs( wf=wf, inputs=inputs, required_inputs=required_inputs, batchrun_options=batchrun_reqs, )) row = cm.create_task_base(wf, outdir=output_dir, store_in_centraldb=not no_store) print(row.wid, file=sys.stdout) engine = engine or jc.engine eng = get_engine_from_eng( engine, wid=row.wid, execdir=WorkflowManager.get_path_for_component_and_dir( row.outputdir, WorkflowManager.WorkflowManagerPath.execution), confdir=WorkflowManager.get_path_for_component_and_dir( row.outputdir, WorkflowManager.WorkflowManagerPath.configuration), logfile=os.path.join( WorkflowManager.get_path_for_component_and_dir( row.outputdir, WorkflowManager.WorkflowManagerPath.logs), "engine.log", ), watch=watch, **kwargs, ) fs = get_filescheme_from_fs(filescheme, **kwargs) environment = Environment(f"custom_{wf.id()}", eng, fs) try: # Note: run_in_foreground can be None, so # (not (run_in_foreground is True)) != (run_in_foreground is False) should_run_in_background = (run_in_background is True or jc.run_in_background is True ) and not (run_in_foreground is True) tm = cm.start_task( wid=row.wid, tool=wf, environment=environment, validation_requirements=validation_reqs, batchrun_requirements=batchrun_reqs, task_path=row.outputdir, hints=hints, inputs_dict=inputsdict, dryrun=dryrun, watch=watch, max_cores=max_cores, max_memory=max_memory, keep_intermediate_files=keep_intermediate_files, run_in_background=should_run_in_background, dbconfig=dbconfig, allow_empty_container=allow_empty_container, container_override=container_override, check_files=check_files, ) Logger.log("Finished starting task task") return tm except KeyboardInterrupt: Logger.info("Exiting...") except Exception as e: # Have to make sure we stop the engine if something happens when creating the task that causes # janis to exit early environment.engine.stop_engine() raise e
def create_task_base(self, wf: Workflow, outdir=None, store_in_centraldb=True): config = JanisConfiguration.manager() """ If you don't spec """ if not outdir and not config.outputdir: raise Exception( f"You must specify an output directory (or specify an '{JanisConfiguration.Keys.OutputDir.value}' " f"in your configuration)" ) default_outdir = None if config.outputdir: default_outdir = os.path.join(config.outputdir, wf.id()) forbiddenids = set() if store_in_centraldb: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT wid FROM tasks").fetchall() ) if outdir: if os.path.exists(outdir): # this should theoretically scoop through all the ones in the taskDB and # add them to the forbidden ones, though this might cause more issues for now. forbiddenids = forbiddenids.union(set(os.listdir(outdir))) else: if os.path.exists(default_outdir): forbiddenids = forbiddenids.union(set(os.listdir(default_outdir))) wid = generate_new_id(forbiddenids) task_path = outdir if not task_path: od = default_outdir dt = datetime.now().strftime("%Y%m%d_%H%M%S") task_path = os.path.join(od, f"{dt}_{wid}/") task_path = fully_qualify_filename(task_path) Logger.info(f"Starting task with id = '{wid}'") row = TaskRow(wid, task_path) WorkflowManager.create_dir_structure(task_path) if store_in_centraldb: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{wid}' in database. To watch, use: 'janis watch {task_path}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def run_from_jobfile( workflow: Union[str, j.Tool, Type[j.Tool]], jobfile: PreparedJob, engine: Union[str, Engine, None] = None, wait: bool = False, # specific engine args cromwell_jar: Optional[str] = None, cromwell_url: Optional[str] = None, ): cm = ConfigManager(db_path=jobfile.db_path) if not workflow: raise Exception("Couldn't find workflow with name: " + str(workflow)) row = cm.create_task_base( wf=workflow, job=jobfile, ) jobfile.execution_dir = row.execution_dir jobfile.output_dir = row.output_dir # set logger for submit Logger.set_write_level(Logger.CONSOLE_LEVEL) logpath = os.path.join( WorkflowManager.get_path_for_component_and_dir( row.execution_dir, WorkflowManager.WorkflowManagerPath.logs), "janis-submit.log", ) Logger.WRITE_LEVELS = {Logger.CONSOLE_LEVEL: (logpath, open(logpath, "a"))} Logger.debug(f"Set submission logging to '{logpath}'") print(row.submission_id, file=sys.stdout) eng = get_engine_from_eng( engine or jobfile.engine, wid=row.submission_id, execdir=WorkflowManager.get_path_for_component_and_dir( row.execution_dir, WorkflowManager.WorkflowManagerPath.execution), confdir=WorkflowManager.get_path_for_component_and_dir( row.execution_dir, WorkflowManager.WorkflowManagerPath.configuration), logfile=os.path.join( WorkflowManager.get_path_for_component_and_dir( row.execution_dir, WorkflowManager.WorkflowManagerPath.logs), "engine.log", ), cromwell_jar=cromwell_jar, cromwell_url=cromwell_url, ) try: wm = WorkflowManager.from_janis( submission_id=row.submission_id, tool=workflow, engine=eng, prepared_submission=jobfile, wait=wait, ) Logger.log("Finished starting task") return wm except KeyboardInterrupt: Logger.info("Exiting...") try: wm.abort() except: pass except Exception as e: # Have to make sure we stop the engine if something happens when creating the task that causes # janis to exit early eng.stop_engine() raise e return wm