def poll_metadata(self): if self._timer_thread.is_set() or self.should_stop: return for engine_id_to_poll in self.progress_callbacks: try: meta = self.metadata(engine_id_to_poll) if meta: for callback in self.progress_callbacks[engine_id_to_poll]: callback(meta) except Exception as e: Logger.critical( f"Received a critical error ({repr(e)}) when getting metadata for " f"Cromwell task {engine_id_to_poll}, hence terminating task with status=SUSPENDED" ) meta = RunModel( id_=None, submission_id=None, status=TaskStatus.SUSPENDED, engine_id=engine_id_to_poll, name=None, execution_dir=None, ) for callback in self.progress_callbacks[engine_id_to_poll]: callback(meta) # call timer again time = self.get_poll_interval() threading.Timer(time, self.poll_metadata).start()
def __init__( self, wf, identifier: str, datatype: DataType, source: ConnectionSource, doc: OutputDocumentation = None, output_folder: Union[str, InputSelector, List[Union[str, InputSelector]]] = None, output_name: Union[str, InputSelector] = None, skip_typecheck=False, ): super().__init__(wf, NodeType.OUTPUT, identifier) self.datatype = datatype # if source[0].node_type != NodeType.STEP: # raise Exception( # f"Unsupported connection type: {"Output"} → {source[0].node_type}" # ) stype = source[0].outputs()[source[1]].outtype snode = source[0] if isinstance(snode, StepNode) and snode.scatter: stype = Array(stype) if not skip_typecheck and not datatype.can_receive_from(stype): Logger.critical( f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' " f"({stype.id()} -/→ {datatype.id()})") self.source = verify_or_try_get_source(source) self.doc = (doc if isinstance(doc, OutputDocumentation) else OutputDocumentation(doc=doc)) self.output_folder = output_folder self.output_name = output_name
def second_formatter(secs): if secs is None: return "N/A" if not secs: return "0s" try: remainder = int(secs) except: Logger.critical(f"second_formatter received non-int type: '{secs}'") return str(secs) intervals = [] ranges = [60, 3600, 86400] extensions = ["s", "m", "h", "d"] under_first_interval = False for r in ranges[::-1]: if under_first_interval or remainder >= r: under_first_interval = True val = remainder // r remainder -= val * r intervals.append(val) intervals.append(remainder) maxintervals = len(intervals) - 1 outp = str(intervals[0]) + extensions[maxintervals] for i in range(1, len(intervals)): ivl = intervals[i] outp += ":" + str(ivl).zfill(2) + extensions[maxintervals - i] return outp
def __init__( self, wf, identifier: str, datatype: DataType, source: ConnectionSource, doc: str = None, output_tag: Union[str, InputSelector] = None, output_prefix: Union[ str, InputSelector, List[Union[str, InputSelector]] ] = None, ): super().__init__(wf, NodeTypes.OUTPUT, identifier) self.datatype = datatype if source[0].node_type != NodeTypes.STEP: raise Exception( f"Unsupported connection type: {NodeTypes.OUTPUT} → {source[0].node_type}" ) stype = source[0].outputs()[source[1]].output_type snode = source[0] if isinstance(snode, StepNode) and snode.scatter: stype = Array(stype) if not datatype.can_receive_from(stype): Logger.critical( f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' " f"({stype.id()} -/→ {datatype.id()})" ) self.source = verify_or_try_get_source(source) self.doc = doc self.output_tag = output_tag self.output_prefix = output_prefix
def resolve(path, workflow_spec, workflow_name): if not path: Logger.critical("Output path was invalid, changed to working directory") path = "." if ExportPathKeywords.workflow_spec in path and workflow_spec is None: raise Exception( f"path ('{path}') contained parameter {ExportPathKeywords.workflow_spec} " "but caller of .resolve did not pass language" ) if ExportPathKeywords.workflow_name in path and workflow_name is None: raise Exception( f"path ('{path}') contained parameter {ExportPathKeywords.workflow_name} " "but caller of .resolve did not pass tool name" ) path = p.expanduser(path) if (len(path) == 1 and path == ".") or path[:2] == "./": path = getcwd() + (path[1:] if len(path) > 1 else "") elif path[0] != "/": path = p.join(getcwd(), path) return path.replace( ExportPathKeywords.workflow_spec, workflow_spec.lower() if workflow_spec else "", ).replace( ExportPathKeywords.workflow_name, workflow_name.lower() if workflow_name else "", )
def get_workflow_metadatadb(execpath, wid, readonly=False): connection = None sqlpath = WorkflowDbManager.get_sql_path_base(execpath) if not wid: Logger.debug("Opening database connection to get wid from: " + sqlpath) try: connection = sqlite3.connect(f"file:{sqlpath}?mode=ro", uri=True) except: Logger.critical("Error when opening DB connection to: " + sqlpath) raise wid = RunDbProvider(db=connection).get_latest() if not wid: raise Exception("Couldn't get WID in task directory") retval = WorkflowMetadataDbProvider(sqlpath, wid, readonly=readonly) if connection: connection.close() return retval
def rm_dir(self, directory): Logger.info(f"Removing local directory '{directory}'") try: return shutil.rmtree(directory) except Exception as e: Logger.critical(f"Error removing directory '{directory}': {e}") return False
def get_recipe_for_keys(self, keys: List[str]): self.load_recipes() if not keys: return {} rec = {} for key in keys: found_key = False if key in self._files_by_key: found_key = True rec.update( self.get_cascaded_dict_from_yamls( self._files_by_key[key])) if key in self.recipes: found_key = True rec.update(self.recipes[key] or {}) if not found_key: Logger.critical(f"Couldn't find '{key}' in known recipes") return rec
def do_docs(args): try: import webbrowser webbrowser.open(DOCS_URL) except Exception as e: Logger.critical(f"Failed to open {DOCS_URL} ({e})")
def do_rm(args): wids = args.wid for wid in wids: try: ConfigManager.manager().remove_task(wid, keep_output=args.keep) except Exception as e: Logger.critical(f"Can't remove {wid}: " + str(e))
def add_tool(tool: Tool) -> bool: v: Optional[str] = tool.version() if not v: t = f"The tool {tool.id()} did not have a version and will not be registered" Logger.critical(t) return False return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def do_rm(args): wids = args.wid for wid in wids: try: ConfigManager.get_from_path_or_submission_lazy( wid, readonly=True).remove_task(wid, keep_output=args.keep) except Exception as e: Logger.critical(f"Can't remove {wid}: " + str(e))
def get_cascaded_dict_from_yamls(files): d = {} for f in files: try: with open(f) as rl: adr = ruamel.yaml.load(rl, Loader=ruamel.yaml.Loader) d.update(adr) except Exception as e: Logger.critical(f"Couldn't parse file '{f}': {e}") return d
def add_tool(tool: Tool) -> bool: v: Optional[str] = tool.version() if not v: t = f"The tool {tool.id()} did not have a version and will not be registered" Logger.critical(t) return False Logger.log("Adding tool: " + tool.id()) JanisShed._byclassname.register(tool.__class__.__name__, tool) return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def check_types(self): from janis_core.workflow.workflow import InputNode, StepNode stoolin: TOutput = self.start.outputs()[ self.stag ] if self.stag is not None else first_value(self.start.outputs()) ftoolin: TInput = self.finish.inputs()[ self.ftag ] if self.ftag is not None else first_value(self.finish.inputs()) stype = stoolin.outtype ftype = ftoolin.intype start_is_scattered = ( isinstance(self.start, StepNode) and self.start.scatter is not None ) if start_is_scattered: Logger.log( f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for " f"'{full_dot(self.finish, self.ftag)}'" ) stype = Array(stype) if self.scatter: if not isinstance(stype, Array): raise Exception( f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = stype.subtype() source_has_default = ( isinstance(self.start, InputNode) and self.start.default is not None ) # Scatters are handled automatically by the StepTagInput Array unwrapping # Merges are handled automatically by the `start_is_scattered` Array wrap self.compatible_types = ftype.can_receive_from(stype, source_has_default) if not self.compatible_types: if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype): self.compatible_types = True if not self.compatible_types: s = full_dot(self.start, self.stag) f = full_dot(self.finish, self.ftag) message = ( f"Mismatch of types when joining '{s}' to '{f}': " f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}" ) if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()): message += " (did you forget to SCATTER?)" Logger.critical(message)
def link_copy_or_fail(source: str, dest: str, force=False): """ Eventually move this to some generic util class :param source: Source to link from :param dest: Place to link to :param force: Overwrite destination if it exists :return: """ try: to_copy = [( LocalFileScheme.prepare_path(source), LocalFileScheme.prepare_path(dest), )] while len(to_copy) > 0: s, d = to_copy.pop(0) # Check if path is Null/None if not s: continue if not d: continue if os.path.exists(d) and force: Logger.debug(f"Destination exists, overwriting '{d}'") if os.path.isdir(d): rmtree(d) else: os.remove(d) Logger.log(f"Hard linking {s} → {d}") if os.path.isdir(s): os.makedirs(d, exist_ok=True) for f in os.listdir(s): to_copy.append((os.path.join(s, f), os.path.join(d, f))) continue try: os.link(s, d) except FileExistsError: Logger.critical( "The file 'd' already exists. The force flag is required to overwrite." ) except Exception as e: Logger.warn("Couldn't link file: " + str(e)) # if this fails, it should error Logger.log(f"Copying file {s} → {d}") copyfile(s, d) except Exception as e: Logger.critical( f"An unexpected error occurred when link/copying {source} -> {dest}: {e}" )
def get_latest_workflow(path) -> str: try: connection = sqlite3.connect( f"file:{WorkflowDbManager.get_sql_path_base(path)}?mode=ro", uri=True) runDb = RunDbProvider(db=connection) return runDb.get_latest() except: Logger.critical("Error when opening DB connection to: " + path) raise
def terminate(self): self.should_terminate = True if self.logfp: try: self.logfp.flush() os.fsync(self.logfp.fileno()) except Exception as e: # This isn't a proper error, there's nothing we could do # and doesn't prohibit the rest of the shutdown of Janis. Logger.critical("Couldn't flush engine stderr to disk: " + str(e))
def _get_tool_entrypoints(): ep = [] eps = pkg_resources.iter_entry_points(group=EP.TOOLS) for entrypoint in eps: try: m = entrypoint.load() ep.append(m) except ImportError as e: t = f"Couldn't import janis data_type extension '{entrypoint.name}': {e}" Logger.critical(t) continue return ep
def db_connection(self): path = self.get_sql_path() try: if self.readonly: Logger.debug( "Opening database connection to in READONLY mode: " + path) return sqlite3.connect(f"file:{path}?mode=ro", uri=True) Logger.debug("Opening database connection: " + path) return sqlite3.connect(path) except: Logger.critical("Error when opening DB connection to: " + path) raise
def _get_datatype_entrypoints(): import importlib_metadata ep = [] eps = importlib_metadata.entry_points().get(EP.DATATYPES, []) for entrypoint in eps: try: m = entrypoint.load() ep.append(m) except ImportError as e: t = f"Couldn't import janis data_type extension '{entrypoint.name}': {e}" Logger.critical(t) continue return ep
def _get_tool_entrypoints(): import importlib_metadata ep = [] eps = importlib_metadata.entry_points().get(EP.TOOLS, []) for entrypoint in eps: try: m = entrypoint.load() ep.append(m) except ImportError as e: t = f"Couldn't import janis tools extension {EP.TOOLS} '{entrypoint.name}': {e}" Logger.critical(t) continue return ep
def get_config_from_script(self, execution_dir: str): try: import subprocess, os, json from janis_assistant.management.envvariables import EnvVariables from janis_assistant.engines.cromwell.cromwellconfiguration import ( CromwellConfiguration, ) file_path = os.getenv(EnvVariables.db_script_generator) Logger.debug( f"Found path '{EnvVariables.db_script_generator}' to generate database credentials" ) if file_path is None: raise Exception( f"Couldn't get database credentials as couldn't find value in env var '{EnvVariables.db_script_generator}'" ) # if not os.path.exists(file_path): # raise Exception(f"Couldn't locate script '{file_path}' to execute") try: val = collect_output_from_command( f"{file_path} {execution_dir}", stderr=Logger.guess_log, shell=True) except Exception as e: Logger.critical( f"Failed to generate database credentials ({repr(e)})") raise d = json.loads(val) Logger.debug("Received keys from database credentials script: " + ", ".join(d.keys())) keys = {"username", "password", "database", "host"} missing_keys = {k for k in keys if k not in d} if len(missing_keys) > 0: raise Exception( "The script to generate database credentials was missing the keys: " + ", ".join(missing_keys)) return CromwellConfiguration.Database.mysql( username=d["username"], password=d["password"], database=d["database"], url=d["host"], ) except Exception as e: Logger.critical( "Failed to get database configuration details from script: " + repr(e)) raise
def cp_to( self, source, dest, force=False, report_progress: Optional[Callable[[float], None]] = None, ): if force: Logger.critical("SSHFileScheme does not support the 'force' flag") Logger.info( f"Secure copying (SCP) from local:{source} to {self.connectionstring}:{dest}" ) args = ["scp", source, self.connectionstring + ":" + dest] subprocess.call(args)
def stop_engine(self): # we're going to abort! if self.process_id: try: import signal os.kill(self.process_id, signal.SIGTERM) except Exception as e: Logger.critical("Couldn't terminate CWLTool as " + str(e)) else: Logger.critical( "Couldn't terminate CWLTool as there was no process ID") return self
def translate_to_cwl_glob(glob, inputsdict, **debugkwargs): if not glob: return None if not isinstance(glob, Selector): Logger.critical( "String globs are being phased out from tool output selections, please use the provided " "Selector (InputSelector or WildcardSelector) classes. " + str(debugkwargs)) return glob if isinstance(glob, InputSelector): if glob.input_to_select: if inputsdict is None or glob.input_to_select not in inputsdict: raise Exception( "An internal error has occurred when generating the output glob for " + glob.input_to_select) tinp: ToolInput = inputsdict[glob.input_to_select] intype = tinp.input_type if isinstance(intype, Filename): if isinstance(intype.prefix, InputSelector): return intype.generated_filename( inputs=prepare_filename_replacements_for( intype.prefix, inputsdict=inputsdict)) else: return intype.generated_filename() else: expr = f"inputs.{glob.input_to_select}" if isinstance(intype, (File, Directory)): expr = expr + ".basename" if tinp.default: expr = f"(inputs.{glob.input_to_select} != null) ? {expr} : {tinp.default}" return f"$({expr})" return translate_input_selector(glob, code_environment=False) elif isinstance(glob, StringFormatter): return translate_string_formatter(glob) elif isinstance(glob, WildcardSelector): return glob.wildcard raise Exception("Unimplemented selector type: " + glob.__class__.__name__)
def start_from_paths(self, wid, source_path: str, input_path: str, deps_path: str): print("TMP: " + os.getenv("TMPDIR")) scale = ["--scale", str(self.scale)] if self.scale else [] loglevel = ["--logLevel=" + self.loglevel] if self.loglevel else [] cmd = ["toil-cwl-runner", "--stats", *loglevel, *scale, source_path, input_path] Logger.debug("Running command: '" + " ".join(cmd) + "'") process = subprocess.Popen( cmd, stdout=subprocess.PIPE, preexec_fn=os.setsid, stderr=subprocess.PIPE ) Logger.info("CWLTool has started with pid=" + str(process.pid)) for line in read_stdout(process): if "Path to job store directory is" in line: idx = line.index("Path to job store directory is") Logger.critical("JOBSTORE DIR: " + line[idx + 1 :]) Logger.debug("toil: " + line) print("finished")
def parse_if_dict(T: Type, val, path: str, skip_if_empty=True): if val is None: return None elif isinstance(val, T): return val elif isinstance(val, dict): if skip_if_empty and len(val) == 0: return None try: return T(**val) except TypeError as e: Logger.critical( f"Couldn't initialise key {path} of type '{T.__name__}', {str(e)}" ) exit(1) raise Exception( f"Couldn't parse for type '{T.__name__}', " f"expected dict but received '{val.__class__.__name__}' for {str(val)}" )
def translate_to_cwl_glob(glob, **debugkwargs): if not glob: return None if not isinstance(glob, Selector): Logger.critical( "String globs are being phased out from tool output selections, please use the provided " "Selector (InputSelector or WildcardSelector) classes. " + str(debugkwargs)) return glob if isinstance(glob, InputSelector): return translate_input_selector(glob, code_environment=False) elif isinstance(glob, StringFormatter): return translate_string_formatter(glob) elif isinstance(glob, WildcardSelector): return glob.wildcard raise Exception("Unimplemented selector type: " + glob.__class__.__name__)
def _get_datatype_transformations_from_entrypoints(): import importlib_metadata ep = [] eps = importlib_metadata.entry_points().get(EP.TRANSFORMATIONS, []) for entrypoint in eps: try: m = entrypoint.load() if m is not None and isinstance(m, list): ep.extend(m) else: Logger.warn( f"Janis transformation entrypoint {entrypoint.name}' was not a list (type {type(m)}). " f"Only export a single list of transformations, for example: " f"`janis_bioinformatics.transformations:transformations`" ) except ImportError as e: t = f"Couldn't import janis datatype_transformation extension '{entrypoint.name}': {e}" Logger.critical(t) continue return ep