def __init__( self, input_to_select, remove_file_extension=None, type_hint=File, **kwargs ): """ :param input_to_select: The name of the input to select :param remove_file_extension: Call basename() and remove the file extension :param type_hint: Janis can't determine the type of the input to select until translation time, so providing a hint type might suppress false warnings. This is similar to using .as_type(dt) """ if not isinstance(input_to_select, str): raise Exception( f"Expected input_to_select to be string, not {type(input_to_select)}: {str(input_to_select)}" ) # maybe worth validating the input_to_select identifier self.input_to_select = input_to_select self.type_hint = get_instantiated_type(type_hint) or File() if "use_basename" in kwargs: use_basename = kwargs["use_basename"] if remove_file_extension is None: remove_file_extension = use_basename Logger.warn( f"The 'use_basename' key is deprecated, please use 'remove_file_extension' instead: " f'InputSelector("{self.input_to_select}", remove_file_extension={str(use_basename)})' ) self.remove_file_extension = remove_file_extension
def stop_engine(self): if self._logger: self._logger.terminate() self.should_stop = True if self._timer_thread: self._timer_thread.set() if self._logfp: self._logfp.flush() os.fsync(self._logfp.fileno()) self._logfp.close() if not self.process_id: Logger.warn("Could not find a cromwell process to end, SKIPPING") return Logger.info("Stopping cromwell") if self.process_id: try: process = os.getpgid(int(self.process_id)) os.killpg(process, signal.SIGTERM) Logger.info("Stopped cromwell") except Exception as e: # can't do Logger.warn("Couldn't stop Cromwell process: " + str(e)) pass else: Logger.warn( "Couldn't stop Cromwell process as Janis wasn't managing it") self.is_started = False
def get_workflow_metadatadb(execpath, wid, readonly=False): connection = None sqlpath = WorkflowDbManager.get_sql_path_base(execpath) if not wid: Logger.debug("Opening database connection to get wid from: " + sqlpath) try: connection = sqlite3.connect(f"file:{sqlpath}?mode=ro", uri=True) except: Logger.critical("Error when opening DB connection to: " + sqlpath) raise wid = RunDbProvider(db=connection).get_latest() if not wid: raise Exception("Couldn't get WID in task directory") retval = WorkflowMetadataDbProvider(sqlpath, wid, readonly=readonly) if connection: connection.close() return retval
def __new__(cls, *args, **kwargs): multiplier = None src, *otherargs = args if len(otherargs) == 1: f = otherargs[0].lower() multiplier_heirarchy = [ ("ki" in f, 1024), ("k" in f, 1000), ("mi" in f, 1.024), ("gi" in f, 0.001024), ("g" in f, 0.001), ] if not any(m[0] for m in multiplier_heirarchy): Logger.warn( f"Couldn't determine prefix {f} for FileSizeOperator, defaulting to MB" ) else: multiplier = [m[1] for m in multiplier_heirarchy if m[0] is True][0] instance = super(FileSizeOperator, cls).__new__(cls) instance.__init__(args[0]) if multiplier is not None and multiplier != 1: return instance * multiplier return instance
def __init__( self, wf, identifier: str, datatype: DataType, source: ConnectionSource, doc: OutputDocumentation = None, output_folder: Union[str, InputSelector, List[Union[str, InputSelector]]] = None, output_name: Union[str, InputSelector] = None, skip_typecheck=False, ): super().__init__(wf, NodeType.OUTPUT, identifier) self.datatype = datatype # if source[0].node_type != NodeType.STEP: # raise Exception( # f"Unsupported connection type: {"Output"} → {source[0].node_type}" # ) stype = source[0].outputs()[source[1]].outtype snode = source[0] if isinstance(snode, StepNode) and snode.scatter: stype = Array(stype) if not skip_typecheck and not datatype.can_receive_from(stype): Logger.critical( f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' " f"({stype.id()} -/→ {datatype.id()})") self.source = verify_or_try_get_source(source) self.doc = (doc if isinstance(doc, OutputDocumentation) else OutputDocumentation(doc=doc)) self.output_folder = output_folder self.output_name = output_name
def second_formatter(secs): if secs is None: return "N/A" if not secs: return "0s" try: remainder = int(secs) except: Logger.critical(f"second_formatter received non-int type: '{secs}'") return str(secs) intervals = [] ranges = [60, 3600, 86400] extensions = ["s", "m", "h", "d"] under_first_interval = False for r in ranges[::-1]: if under_first_interval or remainder >= r: under_first_interval = True val = remainder // r remainder -= val * r intervals.append(val) intervals.append(remainder) maxintervals = len(intervals) - 1 outp = str(intervals[0]) + extensions[maxintervals] for i in range(1, len(intervals)): ivl = intervals[i] outp += ":" + str(ivl).zfill(2) + extensions[maxintervals - i] return outp
def do_docs(args): try: import webbrowser webbrowser.open(DOCS_URL) except Exception as e: Logger.critical(f"Failed to open {DOCS_URL} ({e})")
def rm_dir(self, directory): Logger.info(f"Removing local directory '{directory}'") try: return shutil.rmtree(directory) except Exception as e: Logger.critical(f"Error removing directory '{directory}': {e}") return False
def __init__(self, format: str, **kwargs): self._format: str = format keywords, balance = get_keywords_between_braces(self._format) if balance > 0: Logger.warn( "There was an imbalance of braces in the string _format, this might cause issues with concatenation" ) skwargs = set(kwargs.keys()) if not keywords == skwargs: # what's the differences if not keywords.issubset(skwargs): raise IncorrectArgsException( "The _format required additional arguments to be provided by " "**kwargs, requires the keys:" + ", ".join(keywords - skwargs)) else: raise TooManyArgsException( "The **kwargs contained unrecognised keys: " + ", ".join(skwargs - keywords)) self.kwargs = kwargs
def translate_tool( self, tool, to_console=True, to_disk=False, export_path=None, with_docker=True, with_resource_overrides=False, max_cores=None, max_mem=None, ): tool_out = self.stringify_translated_tool( self.translate_tool_internal( tool, with_docker=with_docker, with_resource_overrides=with_resource_overrides, )) if to_console: print(tool_out) if to_disk: d = ExportPathKeywords.resolve(export_path, workflow_spec=self.name, workflow_name=tool.id()) if not os.path.exists(d): os.makedirs(d) fn_tool = self.tool_filename(tool) with open(os.path.join(d, fn_tool), "w+") as wf: Logger.log(f"Writing {fn_tool} to disk") wf.write(tool_out) Logger.log(f"Wrote {fn_tool} to disk") return tool_out
def __init__( self, wf, identifier: str, datatype: DataType, source: ConnectionSource, doc: str = None, output_tag: Union[str, InputSelector] = None, output_prefix: Union[ str, InputSelector, List[Union[str, InputSelector]] ] = None, ): super().__init__(wf, NodeTypes.OUTPUT, identifier) self.datatype = datatype if source[0].node_type != NodeTypes.STEP: raise Exception( f"Unsupported connection type: {NodeTypes.OUTPUT} → {source[0].node_type}" ) stype = source[0].outputs()[source[1]].output_type snode = source[0] if isinstance(snode, StepNode) and snode.scatter: stype = Array(stype) if not datatype.can_receive_from(stype): Logger.critical( f"Mismatch of types when joining to output node '{source[0].id()}.{source[1]}' to '{identifier}' " f"({stype.id()} -/→ {datatype.id()})" ) self.source = verify_or_try_get_source(source) self.doc = doc self.output_tag = output_tag self.output_prefix = output_prefix
def __init__(self, **connections): super().__init__(metadata_class=WorkflowMetadata) self.connections = connections Logger.log(f"Creating workflow with identifier: '{self.id()}'") if not Validators.validate_identifier(self.id()): raise Exception( f"The identifier '{self.id()}' was invalid because {Validators.reason_for_failure(self.id())}" ) # The following variables allow us to quickly check data about the graph self.nodes: Dict[str, Node] = {} self.input_nodes: Dict[str, InputNode] = {} self.step_nodes: Dict[str, StepNode] = {} self.output_nodes: Dict[str, OutputNode] = {} # Flags for different requirements that a workflow might need self.has_scatter = False self.has_subworkflow = False self.has_multiple_inputs = False # Now that we've initialised everything, we can "construct" the workflows for that subclass this class # else, for the WorkflowBuilder it will do nothing and they'll add workflows later self.constructor()
def get_recipe_for_keys(self, keys: List[str]): self.load_recipes() if not keys: return {} rec = {} for key in keys: found_key = False if key in self._files_by_key: found_key = True rec.update( self.get_cascaded_dict_from_yamls( self._files_by_key[key])) if key in self.recipes: found_key = True rec.update(self.recipes[key] or {}) if not found_key: Logger.critical(f"Couldn't find '{key}' in known recipes") return rec
def map_to_wdl(t: NativeType): import wdlgen as wdl if t == NativeTypes.kBool: return wdl.PrimitiveType.kBoolean elif t == NativeTypes.kInt: return wdl.PrimitiveType.kInt elif (t == NativeTypes.kLong or t == NativeTypes.kFloat or t == NativeTypes.kDouble): return wdl.PrimitiveType.kFloat elif t == NativeTypes.kStr: return wdl.PrimitiveType.kString elif t == NativeTypes.kFile: return wdl.PrimitiveType.kFile elif t == NativeTypes.kStdout: return wdl.PrimitiveType.kFile elif t == NativeTypes.kStderr: return wdl.PrimitiveType.kFile elif t == NativeTypes.kDirectory: Logger.log( "Using data_type 'Directory' for wdl, this requires cromwell>=37 and language=development" ) return wdl.PrimitiveType.kDirectory elif t == NativeTypes.kArray: return wdl.ArrayType.kArray raise Exception( f"Unhandled primitive type {t}, expected one of {', '.join(NativeTypes.all)}" )
def get_value_for_key(d, key, default): val = d.get(key) if not val: return default.get(key) if default else None Logger.log(f"Got value '{val}' for key '{key}'") return val
def resolve(path, workflow_spec, workflow_name): if not path: Logger.critical("Output path was invalid, changed to working directory") path = "." if ExportPathKeywords.workflow_spec in path and workflow_spec is None: raise Exception( f"path ('{path}') contained parameter {ExportPathKeywords.workflow_spec} " "but caller of .resolve did not pass language" ) if ExportPathKeywords.workflow_name in path and workflow_name is None: raise Exception( f"path ('{path}') contained parameter {ExportPathKeywords.workflow_name} " "but caller of .resolve did not pass tool name" ) path = p.expanduser(path) if (len(path) == 1 and path == ".") or path[:2] == "./": path = getcwd() + (path[1:] if len(path) > 1 else "") elif path[0] != "/": path = p.join(getcwd(), path) return path.replace( ExportPathKeywords.workflow_spec, workflow_spec.lower() if workflow_spec else "", ).replace( ExportPathKeywords.workflow_name, workflow_name.lower() if workflow_name else "", )
def poll_metadata(self): if self._timer_thread.is_set() or self.should_stop: return for engine_id_to_poll in self.progress_callbacks: try: meta = self.metadata(engine_id_to_poll) if meta: for callback in self.progress_callbacks[engine_id_to_poll]: callback(meta) except Exception as e: Logger.critical( f"Received a critical error ({repr(e)}) when getting metadata for " f"Cromwell task {engine_id_to_poll}, hence terminating task with status=SUSPENDED" ) meta = RunModel( id_=None, submission_id=None, status=TaskStatus.SUSPENDED, engine_id=engine_id_to_poll, name=None, execution_dir=None, ) for callback in self.progress_callbacks[engine_id_to_poll]: callback(meta) # call timer again time = self.get_poll_interval() threading.Timer(time, self.poll_metadata).start()
def do_rm(args): wids = args.wid for wid in wids: try: ConfigManager.manager().remove_task(wid, keep_output=args.keep) except Exception as e: Logger.critical(f"Can't remove {wid}: " + str(e))
def rm_dir(self, directory): import urllib.request Logger.info(f"Issuing HTTP.DELETE request for directory '{directory}'") req = urllib.request.Request(directory) req.get_method = lambda: "DELETE" return urllib.request.urlopen(req)
def do_version(_): from tabulate import tabulate import importlib_metadata from janis_assistant.__meta__ import __version__ as jr_version from janis_core.__meta__ import __version__ as jc_version import janis_core.toolbox.entrypoints as EP fields = [["janis-core", jc_version], ["janis-assistant", jr_version]] # eps = pkg_resources.iter_entry_points(group=EP.EXTENSIONS) eps = importlib_metadata.entry_points().get(EP.EXTENSIONS, []) skip_eps = {"assistant"} for entrypoint in eps: if entrypoint.name in skip_eps: continue try: version = entrypoint.load().__version__ if version: fields.append(["janis-" + entrypoint.name, version]) skip_eps.add(entrypoint.name) except Exception as e: Logger.log_ex(e) print(tabulate(fields))
def add_tool(tool: Tool) -> bool: v: Optional[str] = tool.version() if not v: t = f"The tool {tool.id()} did not have a version and will not be registered" Logger.critical(t) return False return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def do_rm(args): wids = args.wid for wid in wids: try: ConfigManager.get_from_path_or_submission_lazy( wid, readonly=True).remove_task(wid, keep_output=args.keep) except Exception as e: Logger.critical(f"Can't remove {wid}: " + str(e))
def get_file_size(self, path) -> Optional[int]: try: stat = os.stat(path) if not stat: return None return stat.st_size except Exception as e: Logger.warn(f"Couldn't get file size of path '{path}': {repr(e)}") return None
def add_tool(tool: Tool) -> bool: v: Optional[str] = tool.version() if not v: t = f"The tool {tool.id()} did not have a version and will not be registered" Logger.critical(t) return False Logger.log("Adding tool: " + tool.id()) JanisShed._byclassname.register(tool.__class__.__name__, tool) return JanisShed._toolshed.register(tool.id().lower(), v.lower(), tool)
def cwl_type(self, has_default=False): inner_types = [a.cwl_type(has_default=has_default) for a in self.subtypes] try: inner_types = list(set(inner_types)) except Exception as e: Logger.debug(f"Error creating set from ({inner_types}): {e}") if len(inner_types) == 1: return inner_types[0] return inner_types
def get_cascaded_dict_from_yamls(files): d = {} for f in files: try: with open(f) as rl: adr = ruamel.yaml.load(rl, Loader=ruamel.yaml.Loader) d.update(adr) except Exception as e: Logger.critical(f"Couldn't parse file '{f}': {e}") return d
def convert_generic_class(t, ignore_fields=None, get_string_repr_func=None, workflow_id: str = None): options = [] get_string_repr_func2 = lambda obj: (get_string_repr_func or JanisTranslator.get_string_repr)( obj, workflow_id=workflow_id) try: has_init_dict = not isinstance( t, (Tool, WorkflowBase, PythonTool, StepNode)) and hasattr( t, "init_dictionary") except KeyError: has_init_dict = False if has_init_dict: options.extend(f"{k}={get_string_repr_func2(v)}" for k, v in t.init_dictionary().items()) else: ignore_fields = set((ignore_fields if ignore_fields else []) + ["self", "args", "kwargs"]) params = inspect.signature(type(t).__init__).parameters param_map = {} if not isinstance(t, (StepNode, WorkflowBase)) and hasattr( t, "init_key_map"): param_map = t.init_key_map # fields = fields_to_check if fields_to_check \ # else [f for f in dict(params).keys() if f not in ignore_fields] for fkey in params: if fkey in ignore_fields: continue opts = params[fkey] t_key = param_map.get(fkey, fkey) if t_key is None: continue if hasattr(t, t_key): v = t.__getattribute__(t_key) else: Logger.warn( f"Object '{t.__class__.__name__}' didn't have attribute {t_key}, setting to None and it might get skipped" ) v = None if (v is None and opts.default is None) or v == opts.default: continue options.append(fkey + "=" + get_string_repr_func2(v)) return f"{t.__class__.__name__}({', '.join(options)})"
def check_types(self): from janis_core.workflow.workflow import InputNode, StepNode stoolin: TOutput = self.start.outputs()[ self.stag ] if self.stag is not None else first_value(self.start.outputs()) ftoolin: TInput = self.finish.inputs()[ self.ftag ] if self.ftag is not None else first_value(self.finish.inputs()) stype = stoolin.outtype ftype = ftoolin.intype start_is_scattered = ( isinstance(self.start, StepNode) and self.start.scatter is not None ) if start_is_scattered: Logger.log( f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for " f"'{full_dot(self.finish, self.ftag)}'" ) stype = Array(stype) if self.scatter: if not isinstance(stype, Array): raise Exception( f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = stype.subtype() source_has_default = ( isinstance(self.start, InputNode) and self.start.default is not None ) # Scatters are handled automatically by the StepTagInput Array unwrapping # Merges are handled automatically by the `start_is_scattered` Array wrap self.compatible_types = ftype.can_receive_from(stype, source_has_default) if not self.compatible_types: if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype): self.compatible_types = True if not self.compatible_types: s = full_dot(self.start, self.stag) f = full_dot(self.finish, self.ftag) message = ( f"Mismatch of types when joining '{s}' to '{f}': " f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}" ) if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()): message += " (did you forget to SCATTER?)" Logger.critical(message)
def terminate(self): self.should_terminate = True if self.logfp: try: self.logfp.flush() os.fsync(self.logfp.fileno()) except Exception as e: # This isn't a proper error, there's nothing we could do # and doesn't prohibit the rest of the shutdown of Janis. Logger.critical("Couldn't flush engine stderr to disk: " + str(e))
def __init__(self, d: dict, default: dict): d = d if d else {} self.id = JanisConfiguration.get_value_for_key( d, self.Keys.Id, default) Logger.log("Got template ID: " + str(self.id)) # remove this id from the dictionary: https://stackoverflow.com/a/15411146/ d.pop(self.Keys.Id.value, None) self.template = from_template(self.id, d)