def to_string(self, verbose=0): """String representation.""" lines = [] app = lines.append header = 10 * "=" + " Input File " + 10 * "=" app(header) app(self.string) app(len(header) * "=" + "\n") # Print info on structure(s). if self.structure is not None: app(self.structure.spget_summary()) else: structures = [dt.structure for dt in self.datasets] app("Input file contains %d structures:" % len(structures)) for i, structure in enumerate(structures): app(boxed("Dataset: %d" % (i + 1))) app(structure.spget_summary()) app("") dfs = dataframes_from_structures( structures, index=[i + 1 for i in range(self.ndtset)]) app( boxed( "Tabular view (each row corresponds to a dataset structure)" )) app("") app("Lattice parameters:") app(str(dfs.lattice)) app("") app("Atomic positions:") app(str(dfs.coords)) return "\n".join(lines)
def to_string(self, verbose=0): """String representation.""" lines = [] app = lines.append header = 10 * "=" + " Input File " + 10 * "=" app(header) app(self.string) app(len(header) * "=" + "\n") # Print info on structure(s). if self.structure is not None: app(self.structure.spget_summary()) else: structures = [dt.structure for dt in self.datasets] app("Input file contains %d structures:" % len(structures)) for i, structure in enumerate(structures): app(boxed("Dataset: %d" % (i+1))) app(structure.spget_summary()) app("") dfs = dataframes_from_structures(structures, index=[i+1 for i in range(self.ndtset)]) app(boxed("Tabular view (each row corresponds to a dataset structure)")) app("") app("Lattice parameters:") app(str(dfs.lattice)) app("") app("Atomic positions:") app(str(dfs.coords)) return "\n".join(lines)
def exit_now(): """ Function used to test if we have to exit from the infinite loop below. Return: != 0 if we must exit. > 0 if some error occurred. """ if flow.all_ok: cprint("Flow reached all_ok", "green") return -1 if any(st.is_critical for st in before_task2stat.values()): cprint(boxed("Found tasks with critical status"), "red") return 1 return 0
def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append("No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append("No task is running and cannot find other tasks to submit.") # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint("killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint("Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions)
def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.max_etime_s: # err_msg += "\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_msg += boxed(msg) deadlocked, runnables, running = self.flow.deadlocked_runnables_running() #print("\ndeadlocked:\n", deadlocked, "\nrunnables:\n", runnables, "\nrunning\n", running) if deadlocked and not runnables and not running: msg = "No runnable job with deadlocked tasks:\n %s\nWill shutdown the scheduler and exit" % str(deadlocked) err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions)
def _callback(self): """The actual callback.""" if self.debug: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if all_ok: return self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_lines = [] # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.remindme_s: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_lines.append(msg) #if delta_etime.total_seconds() > self.max_etime_s: # err_lines.append("\nExceeded max_etime_s %s. Will shutdown the scheduler and exit" % self.max_etime_s) # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.max_num_pyexcs: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.max_num_pyexcs) err_lines.append(boxed(msg)) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.safety_ratio * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_lines.append(boxed(msg)) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.max_num_abierrs: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.max_num_abierrs) err_lines.append(boxed(msg)) # Test on the presence of deadlocks. g = self.flow.find_deadlocks() if g.deadlocked: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() print("deadlocked:\n", g.deadlocked, "\nrunnables:\n", g.runnables, "\nrunning\n", g.running) if g.deadlocked and not g.runnables and not g.running: err_lines.append( "No runnable job with deadlocked tasks:\n%s." % str(g.deadlocked)) if not g.runnables and not g.running: # Check the flow again so that status are updated. self.flow.check_status() g = self.flow.find_deadlocks() if not g.runnables and not g.running: err_lines.append( "No task is running and cannot find other tasks to submit." ) # Something wrong. Quit if err_lines: # Cancel all jobs. if self.killjobs_if_errors: cprint( "killjobs_if_errors set to 'yes' in scheduler file. Will kill jobs before exiting.", "yellow") try: num_cancelled = 0 for task in self.flow.iflat_tasks(): num_cancelled += task.cancel() cprint("Killed %d tasks" % num_cancelled, "yellow") except Exception as exc: cprint( "Exception while trying to kill jobs:\n%s" % str(exc), "red") self.shutdown("\n".join(err_lines)) return len(self.exceptions)
def _callback(self): """The actual callback.""" if self.DEBUG: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) #print('before _runem_all in _callback') self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown(msg="All tasks have reached S_OK. Will shutdown the scheduler and exit") # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S: self.num_reminders += 1 msg = ("Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ("\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.MAX_ETIME_S: # err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.MAX_NUM_PYEXCS: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.MAX_NUM_PYEXCS) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS) err_msg += boxed(msg) # Count the number of tasks with status == S_UNCONVERGED. #if self.flow.num_unconverged_tasks: # # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet # msg = ("Found %d unconverged tasks." # "Automatic restarting is not available yet. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) #deadlocks = self.detect_deadlocks() #if deadlocks: # msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions)
def _callback(self): """The actual callback.""" if self.DEBUG: # Show the number of open file descriptors print(">>>>> _callback: Number of open file descriptors: %s" % get_open_fds()) #print('before _runem_all in _callback') self._runem_all() # Mission accomplished. Shutdown the scheduler. all_ok = self.flow.all_ok if self.verbose: print("all_ok", all_ok) if all_ok: self.shutdown( msg= "All tasks have reached S_OK. Will shutdown the scheduler and exit" ) # Handle failures. err_msg = "" # Shall we send a reminder to the user? delta_etime = self.get_delta_etime() if delta_etime.total_seconds() > self.num_reminders * self.REMINDME_S: self.num_reminders += 1 msg = ( "Just to remind you that the scheduler with pid %s, flow %s\n has been running for %s " % (self.pid, self.flow, delta_etime)) retcode = self.send_email(msg, tag="[REMINDER]") if retcode: # Cannot send mail, shutdown now! msg += ( "\nThe scheduler tried to send an e-mail to remind the user\n" + " but send_email returned %d. Aborting now" % retcode) err_msg += msg #if delta_etime.total_seconds() > self.MAX_ETIME_S: # err_msg += "\nExceeded MAX_ETIME_S %s. Will shutdown the scheduler and exit" % self.MAX_ETIME_S # Too many exceptions. Shutdown the scheduler. if self.num_excs > self.MAX_NUM_PYEXCS: msg = "Number of exceptions %s > %s. Will shutdown the scheduler and exit" % ( self.num_excs, self.MAX_NUM_PYEXCS) err_msg += boxed(msg) # Paranoid check: disable the scheduler if we have submitted # too many jobs (it might be due to some bug or other external reasons # such as race conditions between difference callbacks!) if self.nlaunch > self.SAFETY_RATIO * self.flow.num_tasks: msg = "Too many jobs launched %d. Total number of tasks = %s, Will shutdown the scheduler and exit" % ( self.nlaunch, self.flow.num_tasks) err_msg += boxed(msg) # Count the number of tasks with status == S_ERROR. if self.flow.num_errored_tasks > self.MAX_NUM_ABIERRS: msg = "Number of tasks with ERROR status %s > %s. Will shutdown the scheduler and exit" % ( self.flow.num_errored_tasks, self.MAX_NUM_ABIERRS) err_msg += boxed(msg) # Count the number of tasks with status == S_UNCONVERGED. #if self.flow.num_unconverged_tasks: # # TODO: this is needed to avoid deadlocks, automatic restarting is not available yet # msg = ("Found %d unconverged tasks." # "Automatic restarting is not available yet. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) #deadlocks = self.detect_deadlocks() #if deadlocks: # msg = ("Detected deadlocks in flow. Will shutdown the scheduler and exit" # % self.flow.num_unconverged_tasks) # err_msg += boxed(msg) if err_msg: # Something wrong. Quit self.shutdown(err_msg) return len(self.exceptions)