def test_available_by_getting_version( command: Optional[str] = None) -> str: try: version = subprocess.check_output(["singularity", "--version"]).decode() import re # require Singularity 3.x.x match = re.search("(\d+\.\d+\.\d+)", version) if not match: raise Exception( f"Couldn't interpret singularity version {version}") parsed_version = match.group(0) if int(parsed_version[0]) < 3: raise Exception( f"Unsupported singularity version {version}, expected 3.x.x" ) return version # except subprocess.CalledProcessError as e: # raise Container.ContainerEnvNotFound("singularity", e) except Exception as e: if command is not None: try: Logger.info("Trying to load singularity") subprocess.run(command, shell=True) except Exception as e: raise Container.ContainerEnvNotFound("singularity", e) else: raise Container.ContainerEnvNotFound("singularity", e)
def start_container(self): command = ["docker", "run", "-d"] if self.environment_variables: command.extend(f"-e{k}={v}" for k, v in self.environment_variables.items()) if self.bindpoints: command.extend(f"-v{v}:{k}" for k, v in self.bindpoints.items()) if self.exposedports: for k, v in self.exposedports.items(): command.extend(["-p", f"{k}:{v or k}"]) if self.instancename: command.extend(["--name", self.instancename]) command.append(self.container) Logger.info("Starting docker with command: " + str(" ".join(command))) try: self.dockerid = subprocess.check_output(command).decode( "ascii").strip() self.running = True except subprocess.CalledProcessError as e: raise Exception( f"An error occurred while starting a docker container: {e} ({e.output or e.stderr})" )
def start_or_submit(self, run_in_background, watch=False): # check container environment is loaded metadb = self.database.workflowmetadata jc = metadb.configuration metadb.containertype = jc.container.__name__ metadb.containerversion = jc.container.test_available_by_getting_version() # this happens for all workflows no matter what type self.set_status(TaskStatus.QUEUED) wid = metadb.wid # resubmit the engine if not run_in_background: return self.resume() loglevel = LogLevel.get_str(Logger.CONSOLE_LEVEL) command = ["janis", "--logLevel", loglevel, "resume", "--foreground", wid] scriptdir = self.get_path_for_component(self.WorkflowManagerPath.configuration) logdir = self.get_path_for_component(self.WorkflowManagerPath.logs) jc.template.template.submit_detatched_resume( wid=wid, command=command, scriptdir=scriptdir, logsdir=logdir, config=jc ) Logger.info("Submitted detatched engine") if watch: Logger.log("Watching submitted workflow") self.show_status_screen()
def submit_detatched_resume( self, wid: str, command: List[str], scriptdir: str, logsdir: str, config, capture_output: bool = False, ): import subprocess Logger.info("Starting Janis in the background with: " + ( " ".join(command) if isinstance(command, list) else str(command))) try: if capture_output: out = subprocess.check_output(command, close_fds=True, stderr=subprocess.STDOUT) Logger.info(out.decode().strip()) else: subprocess.Popen( command, close_fds=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't submit janis-monitor, non-zero exit code ({e.returncode})" ) raise e
def exec_command(self, command): cmd = ["docker", "exec", "-i", self.dockerid] requiresshell = not isinstance(command, list) cmd.extend(command) if isinstance(command, list) else cmd.append(command) try: Logger.info("Executing command: " + " ".join(cmd)) val = (subprocess.check_output( cmd, shell=requiresshell).decode("ascii").strip()) except subprocess.CalledProcessError as e: Logger.critical( f"Docker exec_command failed '{e}': {e.output or e.stderr}") # check the logs try: logs_command = ["docker", "logs", self.dockerid] Logger.info("Checking docker logs: " + " ".join(logs_command)) Logger.debug(subprocess.check_output(logs_command)) except: Logger.critical( f"Failed to get logs for container {self.dockerid}") return (str(e), e.returncode) return val.strip() if val else val, 0
def cleanup_missing_tasks(self): from tabulate import tabulate rows: [TaskRow] = self.get_lazy_db_connection().get_all_tasks() failed = [] for row in rows: if not os.path.exists(row.outputdir): failed.append((row.wid, row.outputdir)) continue try: _ = WorkflowManager.from_path_with_wid( row.outputdir, row.wid, readonly=True ) except Exception as e: failed.append((row.wid, row.outputdir)) if failed: Logger.warn(f"Removing the following tasks:\n" + tabulate(failed)) if "y" in str(input(f"Remove {len(failed)} tasks (Y / n)? ")).lower(): self.get_lazy_db_connection().remove_by_ids([r[0] for r in failed]) Logger.info("Cleaned up tasks") else: Logger.info("Skipping cleaning of tasks")
def send_slack_notification(result: Dict, option: NotificationOption): Logger.info("sending notification to Slack") if len(result["failed"]) == 0 and not result["execution_error"]: failed = False status = "Test Succeeded" icon = ":white_check_mark:" else: failed = True status = "Test Failed" icon = ":x:" test_description = "" if option.test_id: test_description = f" *{option.test_id}*" summary_block = { "type": "section", "text": { "type": "mrkdwn", "text": f"{icon} {status}{test_description}: {option.tool_name} - {option.test_case}", }, } blocks = [summary_block] if failed and result["failed"]: failed_expected_output = [] for f in result["failed"]: failed_expected_output.append(f":black_small_square: {f}") failed_block = { "type": "section", "text": {"type": "mrkdwn", "text": "\n".join(failed_expected_output)}, } blocks.append(failed_block) if result["execution_error"]: text = result["execution_error"].replace("\n", "<br />") execution_error_block = { "type": "section", "text": {"type": "mrkdwn", "text": f"{result['execution_error']}"}, } blocks.append(execution_error_block) request = {"blocks": blocks} resp = requests.post(url=option.url, json=request) if resp.status_code == requests.codes.ok: Logger.info("Notification sent") else: Logger.warn("Failed to send slack notification") Logger.warn(f"{resp.status_code}: {resp.text}") return resp.status_code, resp.text
def start_container(self): command = ["singularity", "instance", "start"] # if self.environment_variables: # command.extend(f"-e{k}={v}" for k, v in self.environment_variables.items()) if self.bindpoints: command.extend(f"-B{v}:{k}" for k, v in self.bindpoints.items()) if self.exposedports: command.extend([ "--net", "--network=none", "--network-args", *[ f"portmap={v}:{k}/tcp" for k, v in self.exposedports.items() ], ]) try: newenv = os.environ for k in self.environment_variables: newenv["SINGULARITYENV_" + k] = str( self.environment_variables[k]) if not self.instancename: self.instancename = generate_new_id(set()) command.extend([self.container_path(), self.instancename]) Logger.info("Starting singularity with command: " + " ".join(command)) out = subprocess.check_output(command, env=newenv, stderr=subprocess.STDOUT) Logger.debug(f"Singularity output: {out}") singrun = ["singularity", "run", "instance://" + self.instancename] Logger.debug("Started container, now running " + str(singrun)) startprocess = subprocess.Popen(singrun, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) self.run_logger = ProcessLogger( startprocess, prefix="mysql: ", logfp=None, exit_function=self.runlogger_didexit, ) except subprocess.CalledProcessError as e: raise Exception( f"An error occurred while starting a singularity container: {str(e)}.\n\tOutput: {str(e.output)}" )
def mark_paused(self): try: self.database.workflowmetadata.please_pause = True Logger.info( "Marked workflow as paused, this may take some time properly pause" ) self.database.workflowmetadata.commit() return True except Exception as e: Logger.critical("Couldn't mark paused: " + str(e)) return False
def stop_container(self): try: Logger.info(f"Stopping mysql container '{self.instancename}'") if self.run_logger: self.run_logger.terminate() cmd = ["singularity", "instance", "stop", self.instancename] return subprocess.check_output(cmd) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't stop singularity instance '{self.instancename}': {e}" )
def create_task_base(self, wf: Workflow, job: PreparedJob): forbiddenids = set() if job.store_in_central_db: try: with self.with_cursor() as cursor: forbiddenids = set( t[0] for t in cursor.execute("SELECT id FROM tasks").fetchall() ) except sqlite3.OperationalError as e: if "no such column: id" in repr(e): from shutil import move dt = datetime.utcnow() np = f"{job.db_path}.original-{dt.strftime('%Y%m%d')}" Logger.warn(f"Moving old janis-db to '{np}'") move(job.db_path, np) self._taskDB = None return self.create_task_base(wf=wf, job=job) raise submission_id = generate_new_id(forbiddenids) output_dir = fully_qualify_filename(job.output_dir) if not job.execution_dir: job.execution_dir = os.path.join(output_dir, "janis") Logger.debug( f"No execution-dir was provided, constructed one from the output-dir: {job.execution_dir}" ) job.execution_dir = fully_qualify_filename(job.execution_dir) Logger.info( f"Starting task with id = '{submission_id}' | output dir: {job.output_dir} | execution dir: {job.execution_dir}" ) row = TaskRow( submission_id, execution_dir=job.execution_dir, output_dir=output_dir ) WorkflowManager.create_dir_structure(job.execution_dir) if job.store_in_central_db: self.get_lazy_db_connection().insert_task(row) else: Logger.info( f"Not storing task '{submission_id}' in database. To watch, use: 'janis watch {output_dir}'" ) if self._connection: self._connection.commit() self._connection.close() self._taskDB = None self._connection = None return row
def set_status(self, status: TaskStatus, force_notification=False): prev = self.database.workflowmetadata.status if prev == status and not force_notification: return Logger.info("Status changed to: " + str(status)) self.database.workflowmetadata.status = status self.database.commit() # send an email here NotificationManager.notify_status_change(status, self.database.get_metadata())
def submit_detatched_resume( self, wid: str, command: List[str], scriptdir: str, logsdir: str, config, capture_output: bool = False, log_output_to_stdout: bool = False, ): import subprocess Logger.info( "Starting Janis in the background with: " + ( " ".join(f"'{c}'" for c in command) if isinstance(command, list) else str(command) ) ) try: if capture_output: out = ( subprocess.check_output( command, close_fds=True, stderr=subprocess.PIPE ) .decode() .strip() ) Logger.info(out) if log_output_to_stdout: print(out, file=sys.stdout) return out else: # This is important for when Janis submits itself itself in the foreground, # and we don't want the stderr to carry back through. # capture_output should be true when submitting to a slurm cluster or anything. subprocess.Popen( command, close_fds=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError as e: Logger.critical( f"Couldn't submit janis-monitor, non-zero exit code ({e.returncode}): {e.stderr}" ) raise e
def remove_exec_dir(self): status = self.database.workflowmetadata.status keep_intermediate = self.database.workflowmetadata.keepexecutiondir if ( not keep_intermediate and status is not None and status == TaskStatus.COMPLETED ): execdir = self.get_path_for_component(self.WorkflowManagerPath.execution) if execdir and execdir != "None": Logger.info("Cleaning up execution directory") self.environment.filescheme.rm_dir(execdir) self.database.progressDB.set(ProgressKeys.cleanedUp)
def guess_type(potential_type: str): if not potential_type: return None l = potential_type.lower() hopeful_type = JanisShed.get_datatype(l) if not hopeful_type: if "st" in potential_type: hopeful_type = String if hopeful_type: Logger.info( f"Found type {hopeful_type.__name__} from tag: {potential_type}") return hopeful_type
def run_test_case( tool_id: str, test_case: str, engine: EngineType, output: Optional[Dict] = None, config: str = None, ) -> Dict[str, Any]: tool = test_helpers.get_one_tool(tool_id) if not tool: raise Exception(f"Tool {tool_id} not found") runner = ToolTestSuiteRunner(tool, config=config) tests_to_run = [ tc for tc in tool.tests() if tc.name.lower() == test_case.lower() ] if not tests_to_run: raise Exception(f"Test case {test_case} not found") if len(tests_to_run) > 1: raise Exception( f"There is more than one test case with the same name {test_case}") if output is not None: Logger.info( "Dryrun: validating test using provided output data without running the workflow" ) failed = set() succeeded = set() execution_error = "" try: failed, succeeded, output = runner.run_one_test_case(t=tests_to_run[0], engine=engine, output=output) except Exception as e: execution_error = str(e) except SystemExit as e: execution_error = f"Workflow execution failed (exit code: {e.code})" return { "failed": list(failed), "succeeded": list(succeeded), "output": output, "execution_error": execution_error, }
def ensure_downloaded(self): pathed_container = self.container_path() if os.path.exists(pathed_container): return True command = self.get_build_instructions_for(pathed_container, self.container) Logger.info("Couldn't find singularity container, building with: " + " ".join(command)) try: build_result = subprocess.check_output(command) Logger.debug(build_result) except subprocess.CalledProcessError as e: Logger.log_ex(e)
def do_fromwdl(args): from janis_core import WdlParser, Logger Logger.info(f"Loading WDL file: {args.wdlfile}") tool = WdlParser.from_doc(args.wdlfile) Logger.info(f"Loaded {tool.type()}: {tool.versioned_id()}") translated = tool.translate( args.translation, to_console=args.output is None, to_disk=args.output is not None, export_path=args.output, ) return translated
def _download_remote_files(self, test_logic: TTestExpectedOutput): """ Download remote test files (only expected output files) to a cache directory :param test_logic: an object that holds information about an expected output :type test_logic: TTestExpectedOutput :return: None :rtype: None """ file_attributes = ["expected_file", "file_diff_source"] for att in file_attributes: if not hasattr(test_logic, att): raise Exception(f"{test_logic.__class__} has no attribute {att}") source = getattr(test_logic, att) if source: test_helpers.verify_janis_assistant_installed() from janis_assistant.management.filescheme import ( FileScheme, LocalFileScheme, ) # f = FileScheme(source) if not FileScheme.is_local_path(source): fs = FileScheme.get_filescheme_for_url(source) last_modified = fs.last_modified(source) local_file_path = os.path.join( self.cached_input_files_dir, f"{test_helpers.hash_filename(source, last_modified)}_{os.path.basename(source)}", ) # Only download if the file does not already exist if not os.path.exists(local_file_path): Logger.info(f"Downloading remote file to {local_file_path}") os.makedirs(self.cached_input_files_dir, exist_ok=True) fs.cp_from(source, local_file_path) else: Logger.info( f"Skip downloading remote file. File {source} already exists in {local_file_path}" ) setattr(test_logic, att, local_file_path)
def process_completed_task(self): Logger.info( f"Task has finished with status: {self.database.workflowmetadata.status}" ) self.save_metadata_if_required() self.copy_logs_if_required() self.copy_outputs_if_required() if self.database.workflowmetadata.status == TaskStatus.COMPLETED: self.remove_exec_dir() self.get_engine().stop_engine() if self.dbcontainer: self.dbcontainer.stop() Logger.info(f"Finished managing task '{self.wid}'.")
def copy_outputs_if_required(self): if self.database.progressDB.has(ProgressKeys.copiedOutputs): return Logger.debug(f"Workflow '{self.wid}' has copied outputs, skipping") if self.database.workflowmetadata.status != TaskStatus.COMPLETED: return Logger.warn( f"Skipping copying outputs as workflow " f"status was not completed ({self.database.workflowmetadata.status})" ) wf_outputs = self.database.outputsDB.get_all() engine_outputs = self.get_engine().outputs_task(self.get_engine_wid()) eoutkeys = engine_outputs.keys() fs = self.environment.filescheme for out in wf_outputs: eout = engine_outputs.get(out.tag) if eout is None: Logger.warn( f"Couldn't find expected output with tag {out.tag}, found outputs ({', '.join(eoutkeys)}" ) continue originalfile, newfilepath = self.copy_output( fs=fs, outputid=out.tag, prefix=out.prefix, tag=out.tags, secondaries=out.secondaries, extension=out.extension, engine_output=eout, iscopyable=out.iscopyable, ) if isinstance(originalfile, list): originalfile = recursively_join(originalfile, "|") if isinstance(newfilepath, list): newfilepath = recursively_join(newfilepath, "|") self.database.outputsDB.update_paths( tag=out.tag, original_path=originalfile, new_path=newfilepath ) self.database.progressDB.set(ProgressKeys.copiedOutputs) Logger.info(f"View the task outputs: file://{self.get_task_path()}")
def inputs_modifier(self, tool: Tool, inputs: Dict, hints: Dict[str, str]) -> Dict: if not isinstance(tool, WorkflowBase): return inputs wf: WorkflowBase = tool new_inputs = {} for inpnode in wf.input_nodes.values(): if inpnode.id() in inputs and inputs[inpnode.id()] is not None: continue elif inpnode.default is not None or inpnode.datatype.optional: continue elif not (isinstance(inpnode.datatype, File) or (isinstance(inpnode.datatype, Array) and isinstance( inpnode.datatype.fundamental_type(), File))): # If it's not a File or Array[File], then continue (we can't localise it anyway) continue elif inpnode.doc is None or inpnode.doc.source is None: continue Logger.info( f"An input to '{wf.id()}.{inpnode.id()}' was not found, and the workflow specifies a source to potentially locate from." ) doc: InputDocumentation = inpnode.doc # use the source hints to find source = self.determine_appropriate_source_from_hints( tool, inpnode.id(), doc.source) if source: basedir = os.path.join(self.cache_dir, inpnode.id()) os.makedirs(basedir, exist_ok=True) new_inputs[inpnode.id()] = self.localise_inputs( inpnode.id(), inpnode.datatype, basedir, source, localise_secondary_files=not doc. skip_sourcing_secondary_files, ) return {**inputs, **new_inputs}
def submit_workflow_if_required(self): if self.database.progressDB.has(ProgressKeys.submitWorkflow): return Logger.log(f"Workflow '{self.wid}' has submitted, skipping") fn_wf = self.database.workflowmetadata.submission_workflow fn_inp = self.database.workflowmetadata.submission_inputs fn_deps = self.database.workflowmetadata.submission_resources engine = self.get_engine() Logger.debug(f"Submitting task '{self.wid}' to '{engine.id()}'") self._engine_wid = engine.start_from_paths(self.wid, fn_wf, fn_inp, fn_deps) self.database.workflowmetadata.engine_wid = self._engine_wid Logger.info( f"Submitted workflow ({self.wid}), got engine id = '{self.get_engine_wid()}'" ) self.database.progressDB.set(ProgressKeys.submitWorkflow)
def mark_aborted(outputdir, wid: Optional[str]) -> bool: try: if not wid: db = WorkflowManager.from_path_get_latest( outputdir, readonly=False ).database.workflowmetadata else: db = WorkflowDbManager.get_workflow_metadatadb( outputdir, wid, readonly=False ) db.please_abort = True db.kvdb.commit() db.close() Logger.info("Marked workflow as aborted, this may take some time full exit") return True except Exception as e: Logger.critical("Couldn't mark aborted: " + str(e)) return False
def do_runtest(args): config = None if args.config: config = JanisConfiguration.initial_configuration(path=args.config) runner_path = test_runner.__file__ cli_args = sys.argv[2:] run_test_commands = ["python", runner_path] + cli_args if config: commands = config.template.template.prepare_run_test_command( run_test_commands) else: commands = run_test_commands joined_command = "' '".join(commands) Logger.info(f"Deploying test with command: '{joined_command}'") subprocess.run(commands)
def stop_container(self): if not self.dockerid: return Logger.info( f"Skipping end docker container '{self.container}' as no dockerID was found" ) cmd = f"docker stop {self.dockerid}; docker rm -f {self.dockerid}" try: Logger.info("Stopping docker with command: " + str(cmd)) subprocess.check_output(cmd, shell=True) self.running = False except subprocess.CalledProcessError as e: Logger.critical( f"An error occurred when trying to stop the container '{self.container}'. " f"You may need to stop this manually with: '{cmd}'. Error: " + str(e)) raise e
def start_engine_if_required(self): # engine should be loaded from the DB engine = self.get_engine() self.environment.engine = engine is_allegedly_started = engine.test_connection() if is_allegedly_started: return if not isinstance(engine, Cromwell): engine.start_engine() return additional_cromwell_params = [] if not engine.config: Logger.info("Skipping start database as Janis is not managing the config") else: dbconfig: JanisDatabaseConfigurationHelper = self.database.workflowmetadata.dbconfig dbtype = dbconfig.which_db_to_use() if dbtype == dbconfig.DatabaseTypeToUse.existing: engine.config.database = dbconfig.get_config_for_existing_config() elif dbtype == dbconfig.DatabaseTypeToUse.filebased: engine.config.database = dbconfig.get_config_for_filebased_db( path=self.get_path_for_component(self.WorkflowManagerPath.database) + "/cromwelldb" ) elif dbtype == dbconfig.DatabaseTypeToUse.managed: cromwelldb_config = self.start_mysql_and_prepare_cromwell_config() additional_cromwell_params.append( "-Ddatabase.db.url=" + cromwelldb_config.db.url ) engine.config.database = cromwelldb_config else: Logger.warn( "Skipping database config as '--no-database' option was provided." ) engine.start_engine(additional_cromwell_options=additional_cromwell_params) # Write the new engine details back into the database (for like PID, host and is_started) self.database.workflowmetadata.engine = engine
def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description): from janis_assistant.main import WorkflowManager, run_with_outputs if os.path.exists(output_dir): try: wm = WorkflowManager.from_path_get_latest_manager( output_dir, readonly=True ) outs_raw = wm.database.outputsDB.get() outs = { o.id_: o.value or o.new_path for o in outs_raw if o.value or o.new_path } if len(outs) > 0: out_val = first_value(outs) Logger.info( f"Using cached value of transformation ({description}) for {inpid}: {out_val}" ) return out_val Logger.log( f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})" ) except Exception as e: Logger.debug( f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}" ) outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir) if not outs or len(outs) < 1: Logger.critical( f"Couldn't get outputs from transformation ({description}) for '{inpid}'" ) return None return first_value(outs)
def poll_stored_metadata_with_blessed(self, blessed, seconds=1): term = blessed.Terminal() with term.fullscreen(), term.cbreak(): try: print("loading...") is_finished = False did_exit = False # We won't clear the screen if we haven't printed (first loop) and it's finished has_printed = False while not is_finished: meta, is_finished = self.get_meta_call() if meta: if has_printed or not is_finished: print(term.clear) print(meta.format()) has_printed = True val = term.inkey(timeout=0) if val and val.lower() == "q": did_exit = True break elif not is_finished: time.sleep(seconds) # Finished if not did_exit: with term.location(0, term.height - 1): print("Finished, press any key to quit") term.inkey(timeout=None) except KeyboardInterrupt: Logger.info("Exiting")
def update_status(result: Dict, option: UpdateStatusOption): Logger.info(f"Updating test status via {option.method} {option.url}") status = "test-failed" if not len(result["failed"]) and not result["execution_error"]: status = "test-succeeded" data = {"status": status, **result} headers = {"Authorization": f"Bearer {option.token}"} resp = requests.request( method=option.method, url=option.url, json=data, headers=headers ) Logger.info("status updated") Logger.info(f"Response code {resp.status_code}") Logger.info(f"Response:\n{resp.text}") return resp.status_code, resp.text