def send_email(subject: str, body: str): nots = JanisConfiguration.manager().notifications mail_program = nots.mail_program if not mail_program: return Logger.log("Skipping email send as no mail program is configured") if not nots.email or nots.email.lower() == "none": Logger.log("Skipping notify status change as no email") return emails: List[str] = nots.email if isinstance( nots.email, list ) else nots.email.split(",") email_template = f"""\ Content-Type: text/html To: {"; ".join(emails)} From: [email protected] Subject: {subject} {body}""" command = f"echo '{email_template}' | {mail_program}" Logger.log("Sending email with command: " + str(command.replace("\n", "\\n"))) try: subprocess.call(command, shell=True) except Exception as e: Logger.critical(f"Couldn't send email '{subject}' to {emails}: {e}")
def exec_command(self, command): cmd = ["docker", "exec", "-i", self.dockerid] requiresshell = not isinstance(command, list) cmd.extend(command) if isinstance(command, list) else cmd.append(command) try: Logger.info("Executing command: " + " ".join(cmd)) val = (subprocess.check_output( cmd, shell=requiresshell).decode("ascii").strip()) except subprocess.CalledProcessError as e: Logger.critical( f"Docker exec_command failed '{e}': {e.output or e.stderr}") # check the logs try: logs_command = ["docker", "logs", self.dockerid] Logger.info("Checking docker logs: " + " ".join(logs_command)) Logger.debug(subprocess.check_output(logs_command)) except: Logger.critical( f"Failed to get logs for container {self.dockerid}") return (str(e), e.returncode) return val.strip() if val else val, 0
def get_digest(self, info: ContainerInfo) -> Optional[str]: try: token = self.get_token(info) except Exception as e: Logger.critical( f"Couldn't get digest for container (couldn't get token): '{str(info)}': {str(e)}" ) return None if token: Logger.debug( f"Got token for '{info}': {token[: min(5, len(token) - 1)]}..." ) try: req = self.build_request(info, token) Logger.debug(f"Requesting digest from: {req.full_url}") with request.urlopen(req) as response: rheaders = response.headers digest = rheaders.get("etag", rheaders.get("Docker-Content-Digest")) if digest is not None: digest = digest.replace("'", "").replace('"', "") return digest except Exception as e: Logger.critical( f"Couldn't get digest for container '{str(info)}': {str(e)}")
def prepare_tool( tool: Tool, toolversions: List[str], isorphan: bool, is_published_pipeline: bool = False, ): # Stuff to list on the documentation page: # - Versions of tools # - Generated command # - Cool if it grouped the tools by vendor # - if not tool: return None try: if is_published_pipeline: return "" if tool.type() == ToolType.CommandTool: return prepare_commandtool_page(tool, toolversions) elif tool.type() == ToolType.Workflow: return prepare_workflow_page(tool, toolversions) elif tool.type() == ToolType.CodeTool: return prepare_code_tool_page(tool, toolversions) except Exception as e: traceback.print_exc() Logger.critical("Couldn't generate documentation for " + tool.id() + " " + str(e))
def cli_logging(name: str, result: Dict): Logger.info(f"Test Case: {name}") Logger.info(f"Output: {result['output']}") if result["execution_error"]: Logger.critical(result["execution_error"]) if len(result["succeeded"]) > 0: Logger.info(f"{len(result['succeeded'])} expected output PASSED") Logger.info("Succeeded expected output:") for s in result["succeeded"]: Logger.info(s) if len(result["failed"]) > 0: Logger.critical(f"{len(result['failed'])} expected output FAILED") Logger.critical("Failed expected output:") for f in result["failed"]: Logger.critical(f) if len(result["failed"]) == 0 and not result["execution_error"]: Logger.info(f"Test SUCCEEDED: {name}") else: Logger.critical(f"Test FAILED: {name}")
def submit_detatched_resume( self, wid: str, command: List[str], scriptdir: str, logsdir: str, config, capture_output: bool = False, ): import subprocess Logger.info("Starting Janis in the background with: " + ( " ".join(command) if isinstance(command, list) else str(command))) try: if capture_output: out = subprocess.check_output(command, close_fds=True, stderr=subprocess.STDOUT) Logger.info(out.decode().strip()) else: subprocess.Popen( command, close_fds=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't submit janis-monitor, non-zero exit code ({e.returncode})" ) raise e
def determine_appropriate_source_from_hints( self, tool: Tool, inpid: str, source: Union[str, List[str], Dict[str, Union[str, List[str]]]], ) -> Optional[Union[str, List[str]]]: if isinstance(source, str) or isinstance(source, list): return source elif not isinstance(source, dict): Logger.critical( f"The input to the tool '{tool.id()}'.'{inpid}' did not have the correct format for doc.source, " f"expected Union[str, List[str], Dict[str, Union[str, List[str]]]], received '{type(source)}'" ) tishj = ", ".join(source.keys()) if not self.source_hints or len(self.source_hints) == 0: Logger.warn( f"There were no source hints specified to find an input for {tool.id()}.{inpid}, expected one " f"or more of {tishj}. You can specify source hints with --source-hint (in janis prepare)." ) return None for hint in self.source_hints: if hint in source: return source[hint] shj = ", ".join(self.source_hints) Logger.warn( f"Couldn't find any of the specified source_hints ({shj}) in the tool input {tool.id()}.{inpid}'s source fields ({tishj})" ) return None
def query_tasks(self, status, name) -> Dict[str, WorkflowModel]: rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks() failed = [] relevant = {} for row in rows: if not os.path.exists(row.outputdir): failed.append(row.wid) continue try: metadb = WorkflowManager.has( row.outputdir, wid=row.wid, name=name, status=status ) if metadb: model = metadb.to_model() model.outdir = row.outputdir relevant[row.wid] = model except Exception as e: Logger.critical(f"Couldn't check workflow '{row.wid}': {e}") failed.append(row.wid) if failed: failedstr = ", ".join(failed) Logger.warn( f"Couldn't get information for tasks: {failedstr}, run" f"'janis cleanup' to clean up your tasks." ) return relevant
def execute(args): output = None if args.output: output = ast.literal_eval(args.output) try: available_test_cases = find_test_cases(args.tool) if args.test_case: if args.test_case not in available_test_cases: raise TestCasesNotFound( f"Test case with name `{args.test_case}` NOT found." ) test_cases = [args.test_case] else: test_cases = available_test_cases except Exception as e: Logger.critical("Unexpected error occurred when searching for test cases") Logger.critical(str(e)) exit() for tc_name in test_cases: result = run_test_case( tool_id=args.tool, test_case=tc_name, engine=args.engine, output=output, config=args.config, ) result["test_case"] = tc_name cli_logging(result) try: # send output to test framework API if args.test_manager_url and args.test_manager_token: option = UpdateStatusOption( url=args.test_manager_url, token=args.test_manager_token ) update_status(result, option) except Exception as e: Logger.warn(f"Failed to update test status to {args.test_manager_url}") try: # Send notification to Slack if args.slack_notification_url: option = NotificationOption( url=args.slack_notification_url, tool_name=args.tool, test_case=tc_name, test_id=args.test_id, ) send_slack_notification(result=result, option=option) except Exception as e: Logger.warn( f"Failed to send notifications to Slack {args.slack_notification_url}" )
def exec_command(self, command): cmd = ["singularity", "run", "instance://" + self.instancename] cmd.extend(command) if isinstance(command, list) else cmd.append(command) try: val = subprocess.check_output(cmd) return val.strip().decode() if val else val, 0 except subprocess.CalledProcessError as e: Logger.critical("Singularity exec_command failed") return str(e), e.returncode
def __setattr__(self, name, value): if name in self.attributes_to_persist: try: self.kvdb[name] = value except Exception as e: Logger.critical(f"Failed to write {name}={value} due to: {e}") return super().__setattr__(name, value)
def stop_container(self): try: Logger.info(f"Stopping mysql container '{self.instancename}'") if self.run_logger: self.run_logger.terminate() cmd = ["singularity", "instance", "stop", self.instancename] return subprocess.check_output(cmd) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't stop singularity instance '{self.instancename}': {e}" )
def mark_paused(self): try: self.database.workflowmetadata.please_pause = True Logger.info( "Marked workflow as paused, this may take some time properly pause" ) self.database.workflowmetadata.commit() return True except Exception as e: Logger.critical("Couldn't mark paused: " + str(e)) return False
def db_connection(self): try: if self.readonly: Logger.debug( "Opening database connection to in READONLY mode: " + self.db_path ) return sqlite3.connect(f"file:{self.db_path}?mode=ro", uri=True) Logger.debug("Opening database connection: " + self.db_path) return sqlite3.connect(self.db_path) except: Logger.critical("Error when opening DB connection to: " + self.db_path) raise
def get_tag_and_cleanup_prefix( prefix, ) -> Optional[Tuple[str, str, bool, Optional[DataType]]]: """ :param prefix: :return: (raw_element, potentialID, hasSeparator, potentialType) """ # cases: # -a ADAPTER # --adapter=ADAPTER # --quality-cutoff=[5'CUTOFF,]3'CUTOFF el = prefix.lstrip() has_equals = False pretag = None potential_type = None # if prefix is split by ':' or split by if ":" in el or "=" in el: parts = None if ":" in el: parts = el.split(":") elif "=" in el: parts = el.split("=") has_equals = True if len(parts) > 2: Logger.warn( f"Unexpected number of components in the tag '{el}' to guess the type, using '{parts[0]}' and skipping type inference" ) else: el, pt = parts[0], guess_type(parts[1]) if not potential_type and pt: potential_type = pt if " " in el: el = el.split(" ")[0] titleComponents = [l.strip().lower() for l in el.split("-") if l] if len(titleComponents) == 0: Logger.critical( f"Title components for tag '{prefix}' does not have a component") return None tag = "_".join(titleComponents) if tag.lower() in common_replacements: tag = common_replacements[tag.lower()] if tag.lower() == "outputfilename": potential_type = Filename return el, tag, has_equals, potential_type
def submit_detatched_resume( self, wid: str, command: List[str], scriptdir: str, logsdir: str, config, capture_output: bool = False, log_output_to_stdout: bool = False, ): import subprocess Logger.info( "Starting Janis in the background with: " + ( " ".join(f"'{c}'" for c in command) if isinstance(command, list) else str(command) ) ) try: if capture_output: out = ( subprocess.check_output( command, close_fds=True, stderr=subprocess.PIPE ) .decode() .strip() ) Logger.info(out) if log_output_to_stdout: print(out, file=sys.stdout) return out else: # This is important for when Janis submits itself itself in the foreground, # and we don't want the stderr to carry back through. # capture_output should be true when submitting to a slurm cluster or anything. subprocess.Popen( command, close_fds=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't submit janis-monitor, non-zero exit code ({e.returncode}): {e.stderr}" ) raise e
def db_connection(self): config = JanisConfiguration.manager() try: if self.readonly: Logger.debug( "Opening database connection to in READONLY mode: " + config.dbpath ) return sqlite3.connect(f"file:{config.dbpath}?mode=ro", uri=True) Logger.debug("Opening database connection: " + config.dbpath) return sqlite3.connect(config.dbpath) except: Logger.critical("Error when opening DB connection to: " + config.dbpath) raise
def get_list_of_contigs_from_fastafai(fai_idx: str) -> Set[str]: # Structure contig, size, location, basesPerLine and bytesPerLine try: contigs = set() with open_potentially_compressed_file(fai_idx) as f: for l in f: contigs.add(l.split("\t")[0]) return contigs except Exception as e: Logger.critical( f"Couldn't get contigs from reference {fai_idx}: {str(e)}") return set()
def get_list_of_contigs_from_bed(bedfile: str) -> Set[str]: try: contigs = set() with open_potentially_compressed_file(bedfile) as fp: for l in fp: contig: str = l.split("\t")[0] if contig: contigs.add(contig.strip()) return contigs except Exception as e: Logger.critical( f"Couldn't get contigs from bedfile {bedfile}: {str(e)}") return set()
def load_templates_if_required(): import importlib_metadata global additional_templates if additional_templates is None: additional_templates = {} eps = importlib_metadata.entry_points().get(TEMPLATE_EP, []) for entrypoint in eps: # pkg_resources.iter_entry_points(group=TEMPLATE_EP): try: additional_templates[entrypoint.name] = entrypoint.load() except ImportError as e: Logger.critical( f"Couldn't import janis template '{entrypoint.name}': {e}" ) continue
def start(self): """ """ self.prepare_mysql_dirs() self.container.ensure_downloaded() # before we start, we want to create a Database for Cromwell, we can do this by # binding a directory of scripts to /docker-entrypoint-initdb.d (runs *.sh, *.sql, *.sql.gz) # Source: https://hub.docker.com/_/mysql#initializing-a-fresh-instance self.container.bindpoints = { "/var/lib/mysql": self.datadirectory, "/var/run/mysqld": self.mysqldoverride, "/etc/mysql/": self.sqlconfdir, "/docker-entrypoint-initdb.d": self.startupscriptsdir, } if isinstance(self.container, Docker): self.container.exposedports = {self.forwardedport: None} self.container.environment_variables["MYSQL_ALLOW_EMPTY_PASSWORD"] = 1 self.container.environment_variables["MYSQL_INITDB_SKIP_TZINFO"] = 1 self.container.start_container() # Have to wait for it to initialise sleep(10) cmd = [ "mysqladmin", "ping", "-h", f"127.0.0.1", "-P", str(self.forwardedport), "-u", "root", "--wait=60", ] while True: (response, rc) = self.container.exec_command(cmd) if response == "mysqld is alive": return if response: Logger.critical("MySQL error: " + str(response)) raise Exception(response) else: Logger.critical(rc)
def execute(args): output = None if args.output: output = ast.literal_eval(args.output) try: available_test_cases = find_test_cases(args.tool) if args.test_case: if args.test_case not in available_test_cases: raise TestCasesNotFound( f"Test case with name `{args.test_case}` NOT found." ) test_cases = [args.test_case] else: test_cases = available_test_cases except TestCasesNotFound as e: Logger.critical(str(e)) exit() for tc_name in test_cases: result = run_test_case( tool_id=args.tool, test_case=tc_name, engine=args.engine, output=output, config=args.config, ) cli_logging(tc_name, result) # Send notification to Slack if args.slack_notification_url: option = NotificationOption( url=args.slack_notification_url, tool_name=args.tool, test_case=args.test_case, test_id=args.test_id, ) send_slack_notification(result=result, option=option) # send output to test framework API if args.test_manager_url and args.test_manager_token: option = UpdateStatusOption( url=args.test_manager_url, token=args.test_manager_token ) update_status(result, option)
def abort(self) -> bool: self.set_status(TaskStatus.ABORTED, force_notification=True) status = False engine = self.get_engine() try: status = bool(engine.terminate_task(self.get_engine_wid())) except Exception as e: Logger.critical("Couldn't abort task from engine: " + str(e)) try: engine.stop_engine() if self.dbcontainer: self.dbcontainer.stop() except Exception as e: Logger.critical("Couldn't stop engine: " + str(e)) return status
def send_email(subject: str, body: str): import tempfile, os nots = PreparedJob.instance().notifications mail_program = nots.mail_program if not mail_program: return Logger.debug( "Skipping email send as no mail program is configured") if not nots.email or nots.email.lower() == "none": Logger.debug("Skipping notify status change as no email") return emails: List[str] = (nots.email if isinstance(nots.email, list) else nots.email.split(",")) Logger.debug(f"Sending email with subject {subject} to {emails}") email_template = f"""\ Content-Type: text/html To: {"; ".join(emails)} From: {nots.from_email} Subject: {subject} {body}""" # 2020-08-24 mfranklin: Write to disk and cat, because some emails are just too big fd, path = tempfile.mkstemp() try: with os.fdopen(fd, "w") as tmp: # do stuff with temp file tmp.write(email_template) command = f"cat '{path}' | {mail_program}" Logger.log("Sending email with command: " + str(command.replace("\n", "\\n"))) try: subprocess.call(command, shell=True) Logger.debug("Sent email successfully") except Exception as e: Logger.critical( f"Couldn't send email '{subject}' to {emails}: {e}") finally: os.remove(path)
def mark_aborted(outputdir, wid: Optional[str]) -> bool: try: if not wid: db = WorkflowManager.from_path_get_latest( outputdir, readonly=False ).database.workflowmetadata else: db = WorkflowDbManager.get_workflow_metadatadb( outputdir, wid, readonly=False ) db.please_abort = True db.kvdb.commit() db.close() Logger.info("Marked workflow as aborted, this may take some time full exit") return True except Exception as e: Logger.critical("Couldn't mark aborted: " + str(e)) return False
def stop_computation(self): try: # reset pause flag self.database.commit() self.get_engine().stop_engine() if self.dbcontainer: self.dbcontainer.stop() self.database.close() except Exception as e: Logger.critical( "An error occurred while trying to pause Janis state: " + str(e) + "\n\nSee the logfile for more information: " + Logger.WRITE_LOCATION ) Logger.close_file()
def stop_container(self): if not self.dockerid: return Logger.info( f"Skipping end docker container '{self.container}' as no dockerID was found" ) cmd = f"docker stop {self.dockerid}; docker rm -f {self.dockerid}" try: Logger.info("Stopping docker with command: " + str(cmd)) subprocess.check_output(cmd, shell=True) self.running = False except subprocess.CalledProcessError as e: Logger.critical( f"An error occurred when trying to stop the container '{self.container}'. " f"You may need to stop this manually with: '{cmd}'. Error: " + str(e)) raise e
def from_path_with_wid(path, wid, readonly=False): """ :param wid: Workflow ID :param path: Path to workflow :return: TaskManager after resuming (might include a wait) """ # get everything and pass to constructor # database path path = WorkflowManager.get_task_path_for(path) if not os.path.exists(path): raise FileNotFoundError(f"Couldn't find path '{path}'") db = WorkflowDbManager.get_workflow_metadatadb(path, wid, readonly=readonly) if not wid: wid = db.wid # .get_meta_info(InfoKeys.taskId) if not wid: raise Exception(f"Couldn't find workflow with id '{wid}'") envid = db.environment # .get_meta_info(InfoKeys.environment) eng = db.engine fs = db.filescheme env = Environment(envid, eng, fs) try: JanisConfiguration._managed = db.configuration except Exception as e: Logger.critical( "The JanisConfiguration could not be loaded from the DB, this might be due to an older version, we'll load your current config instead. Error: " + str(e) ) JanisConfiguration.initial_configuration(None) db.close() tm = WorkflowManager(outdir=path, wid=wid, environment=env, readonly=readonly) return tm
def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description): from janis_assistant.main import WorkflowManager, run_with_outputs if os.path.exists(output_dir): try: wm = WorkflowManager.from_path_get_latest_manager( output_dir, readonly=True ) outs_raw = wm.database.outputsDB.get() outs = { o.id_: o.value or o.new_path for o in outs_raw if o.value or o.new_path } if len(outs) > 0: out_val = first_value(outs) Logger.info( f"Using cached value of transformation ({description}) for {inpid}: {out_val}" ) return out_val Logger.log( f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})" ) except Exception as e: Logger.debug( f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}" ) outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir) if not outs or len(outs) < 1: Logger.critical( f"Couldn't get outputs from transformation ({description}) for '{inpid}'" ) return None return first_value(outs)
def prepare_all_tools(): JanisShed.hydrate(modules=[janis_unix, janis_bioinformatics]) data_types = JanisShed.get_all_datatypes() tools = { ts[0].id(): {t.version(): t for t in ts} for ts in JanisShed.get_all_tools() } Logger.info(f"Preparing documentation for {len(tools)} tools") Logger.info(f"Preparing documentation for {len(data_types)} data_types") tool_module_index = {} dt_module_index = {} ROOT_KEY = "root" for toolname, toolsbyversion in tools.items(): # tool = tool_vs[0][0]() tool_versions = sort_tool_versions(list(toolsbyversion.keys())) default_version = tool_versions[0] Logger.log( f"Preparing {toolname}, found {len(tool_versions)} version[s] ({','.join(tool_versions)})" ) defaulttool = toolsbyversion[default_version] if isclass(defaulttool): defaulttool = defaulttool() try: tool_path_components = list( filter( lambda a: bool(a), [defaulttool.tool_module(), defaulttool.tool_provider()], )) except Exception as e: Logger.critical(f"Failed to generate docs for {toolname}: {e}") continue # (toolURL, tool, isPrimary) toolurl_to_tool = [(toolname.lower(), defaulttool, True)] + [ (get_tool_url(toolname, v), toolsbyversion[v], False) for v in tool_versions ] path_components = "/".join(tool_path_components) output_dir = f"{tools_dir}/{path_components}/".lower() if not os.path.exists(output_dir): os.makedirs(output_dir) for (toolurl, tool, isprimary) in toolurl_to_tool: output_str = prepare_tool(tool, tool_versions, not isprimary) output_filename = output_dir + toolurl + ".rst" with open(output_filename, "w+") as tool_file: tool_file.write(output_str) nested_keys_append_with_root(tool_module_index, tool_path_components, toolname, root_key=ROOT_KEY) Logger.log("Prepared " + toolname) for d in data_types: # tool = tool_vs[0][0]() if issubclass(d, Array): Logger.info("Skipping Array DataType") continue try: dt = d() except: print(d.__name__ + " failed to instantiate") continue did = dt.name().lower() Logger.log("Preparing " + dt.name()) output_str = prepare_data_type(dt) dt_path_components = [] # dt_path_components = list(filter( # lambda a: bool(a), # [, tool.tool_provider()] # )) path_components = "/".join(dt_path_components) output_dir = f"{dt_dir}{path_components}/" output_filename = output_dir + did + ".rst" if not os.path.exists(output_dir): os.makedirs(output_dir) nested_keys_append_with_root(dt_module_index, dt_path_components, did, root_key=ROOT_KEY) with open(output_filename, "w+") as dt_file: dt_file.write(output_str) Logger.log("Prepared " + did) def prepare_modules_in_index(contents, title, dir, max_depth=1): module_filename = dir + "/index.rst" module_tools = sorted( set(contents[ROOT_KEY] if ROOT_KEY in contents else [])) submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY) indexed_submodules_tools = [m.lower() for m in submodule_keys] with open(module_filename, "w+") as module_file: module_file.write( get_tool_toc( alltoolsmap=tools, title=title, intro_text= f"Automatically generated index page for {title}:", subpages=indexed_submodules_tools, tools=module_tools, max_depth=max_depth, )) for submodule in submodule_keys: prepare_modules_in_index(contents=contents[submodule], title=submodule, dir=f"{dir}/{submodule}/") def prepare_dtmodules_in_index(contents, title, dir, max_depth=1): module_filename = dir + "/index.rst" module_tools = sorted( set(contents[ROOT_KEY] if ROOT_KEY in contents else [])) submodule_keys = sorted(m for m in contents.keys() if m != ROOT_KEY) indexed_submodules_tools = [ m.lower() + "/index" for m in submodule_keys ] with open(module_filename, "w+") as module_file: module_file.write( get_toc( title=title, intro_text= f"Automatically generated index page for {title}:", subpages=indexed_submodules_tools + module_tools, max_depth=max_depth, )) for submodule in submodule_keys: prepare_modules_in_index(contents=contents[submodule], title=submodule, dir=f"{dir}/{submodule}/") prepare_modules_in_index(tool_module_index, title="Tools", dir=tools_dir) prepare_dtmodules_in_index(dt_module_index, title="Data Types", dir=dt_dir, max_depth=1)