def standard(self) -> RunModel: jobs = [] s = DateUtil.parse_iso(self.meta.get("start")) f = DateUtil.parse_iso(self.meta.get("end")) st = 0 if s: ff = f if f else DateUtil.now() st = int(DateUtil.secs_difference(s, ff)) jid = self.meta.get("id") for stepname, call in self.meta.get("calls").items(): stepname = ".".join(stepname.split(".")[1:]) jobs.extend(self.parse_standard_calls(None, stepname, call)) model = RunModel( submission_id=None, id_=None, engine_id=jid, name=self.meta.get("workflowName"), # start=s, # finish=f, execution_dir=self.meta.get("workflowRoot"), status=cromwell_status_to_status(self.meta.get("status")), error=self.get_caused_by_text(), ) model.jobs = jobs return model
def parse_output( key, value ) -> Tuple[str, Union[WorkflowOutputModel, List[WorkflowOutputModel]]]: newkey = "".join(key.split(".")[1:]) fileloc = value if isinstance(value, dict): fileloc = value["location"] if isinstance(fileloc, list): return newkey, [Cromwell.parse_output(key, f)[1] for f in fileloc] # It's hard to know whether the value we get is a File or just a value, # so we'll write it in both values and let Janis figure it out later return ( newkey, WorkflowOutputModel( id_=newkey, original_path=None, timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, value=fileloc, is_copyable=True, submission_id=None, run_id=None, ), )
def format(self, tb, **kwargs): if not self.jobs: return "" nl = "\n" return nl.join( j.format(tb, njobs_in_parent=len(self.jobs), **kwargs) for j in sorted(self.jobs, key=lambda j: j.start or DateUtil.now()))
def update(self, run_id: str, status: TaskStatus): return self.insert_or_update_many([ RunStatusUpdate( submission_id=self.submission_id, run_id=run_id, status=status, date=DateUtil.now(), ) ])
def set(self, key: ProgressKeys): if self.has(key): return with self.with_cursor() as cursor: cursor.execute( self._insert_statement, (self.submission_id, key.value, str(DateUtil.now())), )
def get_poll_interval(self): """ Get poll interval in SECONDS """ if self.polling_interval is not None: return self.polling_interval max_poll = 60 # seconds min_poll = 5 # seconds falloff = 30 * 60 # seconds # https://www.desmos.com/calculator/f5lmjtctfg # ~ (max_poll - min_poll)/2 * (tanh(x/falloff - pi) + 1) + min_poll # # It's just a tanh graph that's been rebiased tanh(x-pi) + 1 # and then scaled x = (DateUtil.now() - (self._start_time or DateUtil.now())).total_seconds() return (max_poll - min_poll) / 2 * (math.tanh(math.pi * (2 * x / falloff - 1)) + 1) + min_poll
def start_from_paths(self, wid, source_path: str, input_path: str, deps_path: str): from janis_assistant.data.models.preparedjob import PreparedJob jobfile = PreparedJob.instance() self.taskmeta = { "start": DateUtil.now(), "status": TaskStatus.PROCESSING, "jobs": {}, } config: CWLToolConfiguration = self.config if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE: config.debug = True config.disable_color = True # more options if not config.tmpdir_prefix: config.outdir = self.execution_dir + "/" config.tmpdir_prefix = self.execution_dir + "/" config.leave_tmpdir = True if jobfile.call_caching_enabled: config.cachedir = os.path.join(self.execution_dir, "cached/") cmd = config.build_command_line(source_path, input_path) Logger.debug("Running command: '" + " ".join(cmd) + "'") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, preexec_fn=os.setsid, stderr=subprocess.PIPE) self.taskmeta["status"] = TaskStatus.RUNNING Logger.info("CWLTool has started with pid=" + str(process.pid)) self.process_id = process.pid self._logger = CWLToolLogger( wid, process, logfp=open(self.logfile, "a+"), metadata_callback=self.task_did_update, exit_function=self.task_did_exit, ) return wid
def __init__( self, submission_id, output_dir: str, execution_dir: str, timestamp: Optional[Union[str, datetime]] = None, ): self.submission_id = submission_id self.output_dir = output_dir self.execution_dir = execution_dir if not timestamp: timestamp = DateUtil.now() elif isinstance(timestamp, str): timestamp = DateUtil.parse_iso(timestamp) self.timestamp = timestamp
def task_did_exit(self, logger: CWLToolLogger, status: TaskStatus): Logger.debug("CWLTool fired 'did exit'") self.taskmeta["status"] = status self.taskmeta["finish"] = DateUtil.now() self.taskmeta["outputs"] = logger.outputs if status != TaskStatus.COMPLETED: js: Dict[str, RunJobModel] = self.taskmeta.get("jobs") for j in js.values(): if j.status != TaskStatus.COMPLETED: j.status = status if logger.error: self.taskmeta["error"] = logger.error for callback in self.progress_callbacks.get(logger.sid, []): callback(self.metadata(logger.sid))
def process_potential_out(run_id, key, out): if isinstance(out, list): outs = [CWLTool.process_potential_out(run_id, key, o) for o in out] ups = {} for o in outs: for k, v in o.items(): if k not in ups: ups[k] = [] ups[k].append(v) return ups updates = {} if out is None: return {} if is_python_primitive(out): updates[key] = WorkflowOutputModel( submission_id=None, run_id=run_id, id_=key, original_path=None, is_copyable=False, timestamp=DateUtil.now(), value=out, new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) elif "path" in out: updates[key] = WorkflowOutputModel( submission_id=None, run_id=run_id, id_=key, is_copyable=True, value=out["path"], original_path=None, timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) for s in out.get("secondaryFiles", []): path = s["path"] ext = path.rpartition(".")[-1] newk = f"{key}_{ext}" updates[newk] = WorkflowOutputModel( submission_id=None, run_id=run_id, id_=newk, value=path, original_path=None, is_copyable=True, timestamp=DateUtil.now(), new_path=None, output_folder=None, output_name=None, secondaries=None, extension=None, ) return updates
def process_metadataupdate_if_match(self, line): match = self.statusupdateregex.match(line) if not match: return name, action = match.groups() s = name.split(" ") if len(s) == 0 or len(s) > 2: return Logger.critical("Unsure how to handle metadata update: " + str(line)) component = s[0] stepname = s[1] if len(s) > 1 else None status = None parentid = "_".join(self.workflow_scope) or None if component == "workflow": if action == "start": if stepname: # stepname is empty for root workflow self.workflow_scope.append(stepname) stepname = None status = TaskStatus.RUNNING elif action == "completed success": if len(self.workflow_scope) > 0: self.workflow_scope.pop(0) status = TaskStatus.COMPLETED elif component == "step": if action == "start": status = TaskStatus.RUNNING elif action == "completed success": status = TaskStatus.COMPLETED if not status: return if not stepname: # return WorkflowModel return jid = f"{parentid}_{stepname}" if parentid else stepname start = DateUtil.now() if status == TaskStatus.RUNNING else None finish = DateUtil.now() if status == TaskStatus.COMPLETED else None job = RunJobModel( submission_id=None, run_id=self.sid, id_=jid, parent=parentid, name=stepname, status=status, attempt=None, shard=None, start=start, finish=finish, backend="local", batchid="", cached=False, container=None, stderr=self.logfp.name, stdout=None, analysis=None, script=None, cpu=None, memory=None, ) self.metadata_callback(self, job)
def insert_or_update_many(self, els: List[T]): if len(els) == 0: return queries: Dict[str, List[List[any]]] = {} update_separator = ",\n" tab = "\t" idkeys = set(self.get_id_keys()) idkeys_ordered = list(idkeys) pkeys_ordered = self.get_primary_keys() existing_keys = set() # (*pkeys_ordered) # get all primary keys dbalias_map: Dict[str, DatabaseObjectField] = { t.dbalias: t for t in self._base.keymap() } updates, inserts = self.filter_updates(els) def add_query(query, values): if query in queries: queries[query].append(values) else: queries[query] = [values] for job in updates: keys, values = job.prepare_insert() # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered] keys_np, values_np = [], [] for k, v in zip(keys, values): if k in idkeys: continue keys_np.append(k) values_np.append(v) # problem is we want to update matching on some fields when they are NULL, our WHERE statement # should be something like: # WHERE id1 = ? AND id2 = ? AND id3 is null AND id4 is null id_keyvalues = { pkey: prep_object_for_db( getattr(job, dbalias_map[pkey].name), encode=dbalias_map[pkey].encode, ) for pkey in idkeys_ordered } id_withvalues_keyvalue_ordered = [ (idkey, idvalue) for idkey, idvalue in id_keyvalues.items() if idvalue is not None ] id_withvalues_updater_keys = [ f"{idkey} = ?" for idkey, _ in id_withvalues_keyvalue_ordered ] id_withvalues_updater_values = [ idvalue for _, idvalue in id_withvalues_keyvalue_ordered ] id_novalues_updater_keys = [ f"{idkey} is NULL" for idkey, idvalue in id_keyvalues.items() if idvalue is None ] prepared_statement = f""" UPDATE {self._tablename} SET {', '.join(f'{k} = ?' for k in keys_np)} WHERE {" AND ".join([*id_withvalues_updater_keys, *id_novalues_updater_keys])} """ vtuple = ( *values_np, *id_withvalues_updater_values, ) add_query(prepared_statement, vtuple) for job in inserts: keys, values = job.prepare_insert() # el_pkeys = [getattr(job, dbalias_map[_k]) for _k in idkeys_ordered] prepared_statement = f""" INSERT INTO {self._tablename} ({', '.join(keys)}) VALUES ({', '.join(f'?' for _ in keys)}); """ add_query(prepared_statement, values) Logger.log( f"DB {self._tablename}: Inserting {len(inserts)} and updating {len(updates)} rows" ) with self.with_cursor() as cursor: start = DateUtil.now() if len(inserts) + len(updates) > 300: Logger.warn( f"DB '{self._tablename}' is inserting {len(inserts)} and updating {len(updates)} rows, this might take a while" ) for query, vvalues in queries.items(): try: Logger.log( f"Running query: {query}\n\t: values: {vvalues}") cursor.executemany(query, vvalues) except OperationalError as e: Logger.log_ex(e) seconds = (DateUtil.now() - start).total_seconds() if seconds > 2: Logger.warn( f"DB '{self._tablename}' took {second_formatter(seconds)} to insert {len(inserts)} and update {len(updates)} rows" ) return True
def format( self, pre, monochrome=False, brief=False, layer=0, njobs_in_parent=None, **kwargs, ): tb = " " fin = self.finish if self.finish else DateUtil.now() time = round(DateUtil.secs_difference(self.start, fin)) if self.start else None # percentage = ( # (round(1000 * time / self.supertime) / 10) # if (self.start and self.supertime) # else None # ) status = self.status or ( sorted(self.events, key=lambda e: e.timestamp)[-1].status if self.events else TaskStatus.PROCESSING ) name = self.name opts = [] if self.shard is not None and self.shard >= 0: opts.append(f"shard-{self.shard}") if self.attempt and self.attempt > 1: opts.append(f"attempt-{self.attempt}") if len(opts) > 0: name += f" ({', '.join(opts)})" standard = pre + f"[{status.symbol()}] {name} ({second_formatter(time)})" col = "" uncol = "" if not monochrome: if status == TaskStatus.FAILED: col = _bcolors.FAIL elif status == TaskStatus.COMPLETED: col = _bcolors.OKGREEN # else: # col = _bcolors.UNDERLINE uncol = _bcolors.ENDC if ( status != TaskStatus.COMPLETED or brief is False or (layer == 0 and njobs_in_parent == 1) ): if self.jobs: ppre = pre + tb subs: List[RunJobModel] = sorted( self.jobs, key=lambda j: ( j.shard or 0, j.start if j.start else DateUtil.now(), ), reverse=False, ) return ( col + standard + "".join( [ "\n" + j.format( ppre, monochrome, brief, layer=layer + 1, **kwargs ) for j in subs ] ) + uncol ) fields: List[Tuple[str, str]] = [] if status == TaskStatus.COMPLETED: if not self.finish: raise Exception(f"Finish was null for completed task: {self.name}") if self.cached: fields.append(("from cache", str(self.cached))) elif status == TaskStatus.RUNNING: fields.extend([("batchid", self.batchid), ("backend", self.backend)]) elif status == TaskStatus.FAILED: if str(self.returncode) != "0": fields.append(["rc", str(self.returncode)]) if self.stderr is not None: fields.append(("stderr", self.stderr)) if self.error: fields.append(("error", self.error)) elif status == TaskStatus.PROCESSING: pass elif status == TaskStatus.QUEUED: pass else: return ( standard + f" :: Unimplemented status: '{status}' for task: '{self.name}'" ) ppre = "\n" + " " * len(pre) + 2 * tb max_row_header_length = 0 if len(fields) > 0: max_row_header_length = max(len(t[0]) for t in fields) + 0 retval = standard + "".join( f"{ppre}{f[0]}:{' ' * (max_row_header_length - len(f[0]))} {f[1]}" for f in fields if f[1] ) return col + retval + uncol
def __setstate__(self, state): super().__setstate__(state) self._logger = None self._process = None self._timer_thread = None self._start_time = DateUtil.now()
def start_engine(self, additional_cromwell_options: List[str] = None): from ...data.models.preparedjob import PreparedJob job = PreparedJob.instance() self._start_time = DateUtil.now() self.timeout = job.cromwell.timeout or 10 if self.test_connection(): Logger.info("Engine has already been started") return self if self.connect_to_instance: self.is_started = True Logger.info( "Cromwell environment discovered, skipping local instance") return self if self._process: self.is_started = True Logger.info( f"Discovered Cromwell instance (pid={self._process}), skipping start" ) return self if self.config: with open(self.config_path, "w+") as f: f.writelines(self.config.output()) Logger.log("Finding cromwell jar") cromwell_loc = self.resolve_jar(self.cromwelljar, job.cromwell, job.config_dir) Logger.info(f"Starting cromwell ({cromwell_loc})...") cmd = ["java", "-DLOG_MODE=standard"] if job.cromwell and job.cromwell.memory_mb: cmd.extend([ f"-Xmx{job.cromwell.memory_mb}M", f"-Xms{max(job.cromwell.memory_mb//2, 1)}M", ]) # if Logger.CONSOLE_LEVEL == LogLevel.VERBOSE: # cmd.append("-DLOG_LEVEL=DEBUG") if additional_cromwell_options: cmd.extend(additional_cromwell_options) self.port = find_free_port() self.host = f"127.0.0.1:{self.port}" cmd.append(f"-Dwebservice.port={self.port}") cmd.append(f"-Dwebservice.interface=127.0.0.1") if self.config_path and os.path.exists(self.config_path): Logger.debug("Using configuration file for Cromwell: " + self.config_path) cmd.append("-Dconfig.file=" + self.config_path) cmd.extend(["-jar", cromwell_loc, "server"]) Logger.debug(f"Starting Cromwell with command: '{' '.join(cmd)}'") self._process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # preexec_fn creates a process group https://stackoverflow.com/a/4791612/ preexec_fn=os.setsid, ) Logger.info("Cromwell is starting with pid=" + str(self._process.pid)) Logger.debug( "Cromwell will start the HTTP server, reading logs to determine when this occurs" ) self._logfp = open(self.logfile, "a+") Logger.info(f"Will log Cromwell output to the file: {self.logfile}" if bool(self._logfp) else "Janis is NOT logging Cromwell output to a file") for c in iter(self._process.stdout.readline, "b"): # replace '' with b'' for Python 3 line = None if c: line = c.decode("utf-8").rstrip() if not line: rc = self._process.poll() if rc is not None: critical_suffix = f"Last received message '{line}'. " Logger.critical( f"Cromwell has exited with rc={rc}. {critical_suffix}The last lines of the logfile ({self.logfile}):" ) Logger.critical("".join(tail(self._logfp, 10))) return False continue if self._logfp and not self._logfp.closed: self._logfp.write(line + "\n") self._logfp.flush() os.fsync(self._logfp.fileno()) Logger.debug("Cromwell: " + line) # self.stdout.append(str(c)) if "service started on" in line: self.process_id = self._process.pid Logger.info("Service successfully started with pid=" + str(self._process.pid)) break # elif ansi_escape.match(): # raise Exception(cd) self.is_started = True if self._process: self._logger = ProcessLogger( process=self._process, prefix="Cromwell: ", logfp=self._logfp, # exit_function=self.something_has_happened_to_cromwell, ) return self
def format(self, **kwargs): tb = " " nl = "\n" start, finish = None, DateUtil.now() last_updated = None if self.runs: start_times = [s.start for s in self.runs if s.start] finish_times = [s.finish for s in self.runs if s.finish] last_updated_times = [ s.last_updated for s in self.runs if s.last_updated ] if start_times: start = min(start_times) if finish_times and len(finish_times) == len(self.runs): finish = max(finish_times) if last_updated_times: last_updated = max(last_updated_times) fin = finish or DateUtil.now() duration = round((fin - start).total_seconds()) if start else 0 updated_text = "Unknown" if last_updated: secs_ago = int((DateUtil.now() - last_updated).total_seconds()) if secs_ago > 2: updated_text = second_formatter(secs_ago) + " ago" else: updated_text = "Just now" updated_text += f" ({last_updated.replace(microsecond=0).isoformat()})" engine_ids, rstatuses, errors = "", "", [] if self.error: errors.append(f"GLOBAL: {self.error}") if self.runs: engine_ids = ", ".join(r.engine_id for r in self.runs if r.engine_id) errors.extend(r.error for r in self.runs if r.error) rstatuses = ", ".join( str(r.status.to_string()) for r in self.runs if r.status) statuses = self.status.to_string() if self.status else rstatuses ers = "\n".join(errors) return f"""\ SID: {self.id_} EngId: {engine_ids} Engine: {self.engine_type} Task Dir: {self.output_dir} Exec Dir: {self.execution_dir} Name: {self.get_names() or 'N/A'} Status: {statuses} Duration: {second_formatter(duration)} Start: {start.isoformat() if start else 'N/A'} Finish: {finish.isoformat() if finish else "N/A"} Updated: {updated_text} Jobs: {nl.join(j.format(tb, **kwargs) for j in sorted(self.runs, key=lambda j: j.start or DateUtil.now()))} {("Error: " + ers) if ers else ''} """.strip()
def insert_or_update_many(self, els: List[RunModel]): for el in els: el.last_updated = DateUtil.now() return super().insert_or_update_many(els)
def parse_standard_call(cls, parentid, stepname, call): parent = parentid jid = f"{parentid}_{stepname}" if parentid else stepname shard = call.get("shardIndex") attempt = call.get("attempt") is_shard = shard is not None and shard >= 0 is_second_attempt = attempt is not None and attempt > 1 # We'll rebase the parent job if there are any shards (so they all sit under one heading) # OR if it's a second attempt, we'll group it under the first attempt. if is_shard: parent = jid # We rely on the calling function to create a job with the parentId we're setting. jid += f"_shard-{shard}" if is_second_attempt: # There will already be a parent with this id_ (as this is the second attempt) parent = jid jid += f"_attempt-{attempt}" subjobs = [] status = cromwell_status_to_status( call.get("status") or call.get("executionStatus") ) start = call.get("start") finish = call.get("end") # if the call has "subworkflowMetadata", we have a different schema: if "subWorkflowMetadata" in call: # it's actually a workflow submeta = call.get("subWorkflowMetadata") for sn, subcalls in submeta.get("calls", {}).items(): sn = ".".join(sn.split(".")[1:]) subjobs.extend(cls.parse_standard_calls(jid, sn, subcalls)) start = start or min(s.start for s in subjobs) finish = finish callroot = call.get("callRoot") return RunJobModel( id_=jid, submission_id=None, run_id=None, parent=parent, container=call.get( "dockerImageUsed", call.get("runtimeAttributes", {}).get("docker") ), name=stepname, status=status, batchid=call.get("jobId"), backend=None, stdout=call.get("stdout"), stderr=call.get("stderr"), start=DateUtil.parse_iso(start) if start is not None else DateUtil.now(), finish=DateUtil.parse_iso(finish) if finish is not None else None, jobs=subjobs or None, cached=call.get("callCaching").get("hit") if "callCaching" in call else False, shard=shard, attempt=attempt, analysis=None, memory=None, cpu=None, error=CromwellMetadata.unwrap_caused_by(call.get("failures")), returncode=call.get("returnCode"), workdir=call.get("callRoot"), script=("file:/" + os.path.join(callroot, "execution/script")) if callroot else None, )
def __init__( self, id_: str, submission_id: str, run_id: str, parent: Optional[str], name: str, batchid: Optional[str] = None, shard: Optional[int] = None, attempt: Optional[int] = None, container: Optional[str] = None, status: TaskStatus = None, start: Union[str, datetime] = None, finish: Optional[Union[str, datetime]] = None, backend: Optional[str] = None, cached: bool = None, stdout: Optional[str] = None, stderr: Optional[str] = None, script: Optional[str] = None, error: Optional[str] = None, returncode: Optional[str] = None, memory: Optional[str] = None, cpu: Optional[str] = None, analysis: Optional[str] = None, # Optional jobs: Optional[list] = None, lastupdated: Union[str, datetime] = None, workdir: Optional[str] = None, ): self.id_ = id_ self.submission_id = submission_id self.run_id = run_id self.parent = parent self.status = status if isinstance(status, TaskStatus) else TaskStatus(status) self.name = name self.batchid = batchid self.shard = None if shard is not None: if isinstance(shard, str) and shard.isdigit: shard = int(shard) if shard >= 0: self.shard = shard self.attempt = None if attempt is not None: if isinstance(attempt, str) and attempt.isdigit(): attempt = int(attempt) if attempt > 1: self.attempt = attempt self.container = container self.backend = backend self.cached = cached self.stderr = stderr self.stdout = stdout self.error = error self.returncode = returncode self.workdir = workdir self.lastupdated = lastupdated or DateUtil.now() if isinstance(lastupdated, str): self.lastupdated = DateUtil.parse_iso(lastupdated) self.start = start self.finish = finish if start and isinstance(start, str): self.start = DateUtil.parse_iso(start) if finish and isinstance(finish, str): self.finish = DateUtil.parse_iso(finish) self.script = script self.memory = memory self.cpu = cpu self.analysis = analysis self.jobs: Optional[List[RunJobModel]] = jobs or None self.events = None