def pipeline(task, wkname, conf=None): logindent(2) if not task: # in this case, conf is expected source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" all_seqs = GLOBALS["target_sequences"] initial_task = Msf(set(all_seqs), set(), seqtype=source_seqtype) initial_task.main_tree = None initial_task.threadid = generate_runid() initial_task.configid = initial_task.threadid initial_task.target_wkname = wkname # Register node db.add_node(initial_task.threadid, initial_task.nodeid, initial_task.cladeid, initial_task.target_seqs, initial_task.out_seqs) new_tasks = [initial_task] else: conf = GLOBALS[task.configid] npr_conf = IterConfig(conf, wkname, task.size, task.seqtype) new_tasks = process_task(task, wkname, npr_conf, conf["_nodeinfo"]) process_new_tasks(task, new_tasks, conf) logindent(-2) return new_tasks
def pipeline(task, wkname, conf=None): logindent(2) # Points to npr parameters according to task properties if not task: source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" npr_conf = IterConfig(conf, wkname, len(GLOBALS["target_species"]), source_seqtype) cogconf, cogclass = npr_conf.cog_selector initial_task = cogclass(GLOBALS["target_species"], set(), source_seqtype, conf, cogconf) initial_task.main_tree = main_tree = None initial_task.threadid = generate_runid() initial_task.configid = initial_task.threadid initial_task.target_wkname = wkname # Register node db.add_node(initial_task.threadid, initial_task.nodeid, initial_task.cladeid, initial_task.targets, initial_task.outgroups) new_tasks = [initial_task] else: conf = GLOBALS[task.configid] npr_conf = IterConfig(conf, wkname, task.size, task.seqtype) new_tasks = process_task(task, wkname, npr_conf, conf['_nodeinfo']) process_new_tasks(task, new_tasks, conf) logindent(-2) return new_tasks
def show_task_info(task): log.log(26, "") set_logindent(1) log.log(28, "(%s) %s" % (color_status(task.status), task)) logindent(2) st_info = ', '.join(["%d(%s)" % (v, k) for k, v in task.job_status.iteritems()]) log.log(26, "%d jobs: %s" %(len(task.jobs), st_info)) tdir = task.taskid tdir = tdir.lstrip("/") log.log(20, "TaskDir: %s" %tdir) if task.status == "L": logindent(-2) log.warning("Some jobs within the task [%s] are marked as (L)ost," " meaning that although they look as running," " its execution could not be tracked. NPR will" " continue execution with other pending tasks." %task) logindent(2) logindent(2) # Shows details about jobs for j in task.jobs: if j.status == "D": log.log(20, "(%s): %s", j.status, j) else: log.log(24, "(%s): %s", j.status, j) logindent(-2)
def get_jobs_status(self, sge_jobs=None): """ Check the status of all children jobs. """ self.cores_used = 0 all_states = defaultdict(int) jobs_to_check = set(reversed(self.jobs)) while jobs_to_check: j = jobs_to_check.pop() logindent(1) jobid = j.taskid if istask(j) else j.jobid if jobid in GLOBALS["cached_status"]: log.log(22, "@@8:Recycling status@@1: %s" % j) st = GLOBALS["cached_status"][jobid] all_states[st] += 1 elif j not in self._donejobs: st = j.get_status(sge_jobs) GLOBALS["cached_status"][jobid] = st all_states[st] += 1 if st == "D": self._donejobs.add(j) # If task has an internal worflow processor, # launch it and populate with new jobs if istask(j) and j.task_processor: pipeline = j.task_processor target_workflow = j.target_wkname for new_job in pipeline(j, target_workflow): jobs_to_check.add(new_job) self.jobs.append(new_job) elif st in set("QRL"): if isjob(j) and not j.host.startswith("@sge"): self.cores_used += j.cores elif istask(j): self.cores_used += j.cores_used elif st == "E": errorpath = j.jobdir if isjob(j) else j.taskid raise TaskError(j, "Job execution error %s" % errorpath) else: all_states["D"] += 1 logindent(-1) if not all_states: all_states["D"] += 1 return all_states
def get_status(self, sge_jobs=None): # If another tasks with the same id (same work to be done) has # been checked in the same cycle, reuse its information if self.taskid in GLOBALS["cached_status"]: return GLOBALS["cached_status"][self.taskid] # Otherwise check the status or all its children jobs and # tasks logindent(2) # last_status = db.get_last_task_status(self.taskid) task_saved = db.task_is_saved(self.taskid) # If task is processed and saved, just return its state # without checking children if task_saved and self.status == "D": log.log(24, "@@8:Task is done and processed@@1:") self.status = "D" # If I have just noticed the task is done and saved, load its # stored data. elif task_saved and self.status != "D": log.log(26, "@@8:Loading pre-computed data@@1:") self.status = "D" self.load_stored_data() else: # Otherwise, we need to check for all children self.job_status = self.get_jobs_status(sge_jobs) job_statuses = set(self.job_status.keys()) # If all children jobs have just finished, we process the # task, and save it into the database if job_statuses == set("D"): logindent(-2) log.log(22, "Processing done task: %s", self) logindent(2) try: self.finish() except Exception, e: print traceback.print_exc() raise TaskError(self, e) else: # store in database ....... if self.check(): self.status = "D" elif self.status == "!": # this means the finish procedure has generate # new jobs associated to the task, so it # requires relaunching self.status = "W" else: # Otherwise, everything point to errors when # processing raise TaskError(self, "Task check not passed") # Otherwise, update the ongoing task status, but do not # store result yet. else:
else: # Order matters if "E" in job_statuses: self.status = "E" elif "L" in job_statuses: self.status = "L" elif "R" in job_statuses: self.status = "R" elif "Q" in job_statuses: self.status = "Q" elif "W" in job_statuses: self.status = "W" else: log.error("unknown task state %s" % (job_statuses)) logindent(-2) GLOBALS["cached_status"][self.taskid] = self.status return self.status def init(self): # List of associated jobs necessary to complete the task. Job # and Task classes are accepted as elements in the list. self.jobs = [] self._donejobs = set() self._running_jobs = set() # Prepare required jobs self.load_jobs()
def schedule(workflow_task_processor, pending_tasks, schedule_time, execution, debug, norender): # Adjust debug mode if debug == "all": log.setLevel(10) pending_tasks = set(pending_tasks) ## =================================== ## INITIALIZE BASIC VARS execution, run_detached = execution thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) expected_threads = set(thread2tasks.keys()) past_threads = {} thread_errors = defaultdict(list) ## END OF VARS AND SHORTCUTS ## =================================== cores_total = GLOBALS["_max_cores"] if cores_total > 0: job_queue = Queue() back_launcher = Process(target=background_job_launcher, args=(job_queue, run_detached, GLOBALS["launch_time"], cores_total)) back_launcher.start() else: job_queue = None back_launcher = None GLOBALS["_background_scheduler"] = back_launcher GLOBALS["_job_queue"] = job_queue # Captures Ctrl-C for debuging DEBUG #signal.signal(signal.SIGINT, control_c) last_report_time = None BUG = set() try: # Enters into task scheduling while pending_tasks: wtime = schedule_time # ask SGE for running jobs if execution == "sge": sgeid2jobs = db.get_sge_tasks() qstat_jobs = sge.qstat() else: qstat_jobs = None # Show summary of pending tasks per thread thread2tasks = defaultdict(list) for task in pending_tasks: thread2tasks[task.configid].append(task) set_logindent(0) log.log(28, "@@13: Updating tasks status:@@1: (%s)" % (ctime())) info_lines = [] for tid, tlist in thread2tasks.iteritems(): threadname = GLOBALS[tid]["_name"] sizelist = ["%s" %getattr(_ts, "size", "?") for _ts in tlist] info = "Thread @@13:%s@@1:: pending tasks: @@8:%s@@1: of sizes: %s" %( threadname, len(tlist), ', '.join(sizelist)) info_lines.append(info) for line in info_lines: log.log(28, line) if GLOBALS["email"] and last_report_time is None: last_report_time = time() send_mail(GLOBALS["email"], "Your NPR process has started", '\n'.join(info_lines)) ## ================================ ## CHECK AND UPDATE CURRENT TASKS checked_tasks = set() check_start_time = time() to_add_tasks = set() GLOBALS["cached_status"] = {} for task in sorted(pending_tasks, sort_tasks): # Avoids endless periods without new job submissions elapsed_time = time() - check_start_time #if not back_launcher and pending_tasks and \ # elapsed_time > schedule_time * 2: # log.log(26, "@@8:Interrupting task checks to schedule new jobs@@1:") # db.commit() # wtime = launch_jobs(sorted(pending_tasks, sort_tasks), # execution, run_detached) # check_start_time = time() # Enter debuging mode if necessary if debug and log.level > 10 and task.taskid.startswith(debug): log.setLevel(10) log.debug("ENTERING IN DEBUGGING MODE") thread2tasks[task.configid].append(task) # Update tasks and job statuses if task.taskid not in checked_tasks: try: show_task_info(task) task.status = task.get_status(qstat_jobs) db.dataconn.commit() if back_launcher and task.status not in set("DE"): for j, cmd in task.iter_waiting_jobs(): j.status = "Q" GLOBALS["cached_status"][j.jobid] = "Q" if j.jobid not in BUG: if not os.path.exists(j.jobdir): os.makedirs(j.jobdir) for ifile, outpath in j.input_files.iteritems(): try: _tid, _did = ifile.split(".") _did = int(_did) except (IndexError, ValueError): dataid = ifile else: dataid = db.get_dataid(_tid, _did) if not outpath: outfile = pjoin(GLOBALS["input_dir"], ifile) else: outfile = pjoin(outpath, ifile) if not os.path.exists(outfile): open(outfile, "w").write(db.get_data(dataid)) log.log(24, " @@8:Queueing @@1: %s from %s" %(j, task)) job_queue.put([j.jobid, j.cores, cmd, j.status_file]) BUG.add(j.jobid) update_task_states_recursively(task) db.commit() checked_tasks.add(task.taskid) except TaskError, e: log.error("Errors found in %s" %task) import traceback traceback.print_exc() if GLOBALS["email"]: threadname = GLOBALS[task.configid]["_name"] send_mail(GLOBALS["email"], "Errors found in %s!" %threadname, '\n'.join(map(str, [task, e.value, e.msg]))) pending_tasks.discard(task) thread_errors[task.configid].append([task, e.value, e.msg]) continue else: # Set temporary Queued state to avoids launching # jobs from clones task.status = "Q" if log.level < 24: show_task_info(task) if task.status == "D": #db.commit() show_task_info(task) logindent(3) # Log commands of every task if 'cmd_log_file' not in GLOBALS[task.configid]: GLOBALS[task.configid]['cmd_log_file'] = pjoin(GLOBALS[task.configid]["_outpath"], "cmd.log") O = open(GLOBALS[task.configid]['cmd_log_file'], "w") O.close() cmd_lines = get_cmd_log(task) CMD_LOG = open(GLOBALS[task.configid]['cmd_log_file'], "a") print >>CMD_LOG, task for c in cmd_lines: print >>CMD_LOG, ' '+'\t'.join(map(str, c)) CMD_LOG.close() # try: #wkname = GLOBALS[task.configid]['_name'] create_tasks = workflow_task_processor(task, task.target_wkname) except TaskError, e: log.error("Errors found in %s" %task) pending_tasks.discard(task) thread_errors[task.configid].append([task, e.value, e.msg]) continue else: logindent(-3) to_add_tasks.update(create_tasks) pending_tasks.discard(task) elif task.status == "E": log.error("task contains errors: %s " %task) log.error("Errors found in %s") pending_tasks.discard(task) thread_errors[task.configid].append([task, None, "Found (E) task status"])