def _save_noraise(self, process): try: self.save(process) except BaseException: LOGGER.error( "Exception raised trying to pickle process (pid={})\n{}". format(process.pid, traceback.format_exc()))
def load_all_processes(self): """ Will detect all pickles in the running directory and will try to load them up into Processes. As soon as a pickle is considered for loading, a lock is placed on it, which is not released until the process is destroyed. This is necessary to prevent another thread from loading up the same process. :return: a list of Process instances """ processes = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: process = self.create_from_file_and_persist(f) except (portalocker.LockException, IOError): continue except BaseException: LOGGER.warning("Failed to load checkpoint '{}' (deleting)\n{}" .format(f, traceback.format_exc())) try: os.remove(f) except OSError: pass else: processes.append(process) return processes
def on_process_playing(self, process): try: self._filelocks[process.pid].acquire() except portalocker.LockException: LOGGER.warning( "Couldn't acquire file lock for '{}', not persisting.".format( self.get_running_path(process.pid))) del self._filelocks[process.pid]
def on_process_finish(self, process): try: self.save(process) self._release_process(process.pid, self.finished_directory) except pickle.PicklingError: LOGGER.error("exception raised trying to pickle process (pid={}) " "during on_finish message.".format(process.pid)) except ValueError: pass
def load_all_checkpoints(self): checkpoints = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: checkpoints.append(self.load_checkpoint_from_file(f)) except BaseException as e: LOGGER.warning( "Failed to load checkpoint {} because of exception\n" "{}".format(f, e.message)) return checkpoints
def persist_process(self, process): # If the process doesn't have a persisted state then persist it now if not path.isfile(self.get_running_path(process.pid)): try: self.save(process) except pickle.PicklingError as e: LOGGER.error( "exception raised trying to pickle process (pid={}).\n" "{}".format(process.pid, e.message)) try: process.add_process_listener(self) except AssertionError: # Happens if we're already listening pass
def save(self, process): self._ensure_directory(self._running_directory) filename = self.get_running_path(process.pid) lock = self._filelocks.get(process.pid, RLock(filename, 'w+b', timeout=0)) with lock as f: checkpoint = self.create_bundle(process) self._clear(f) try: pickle.dump(checkpoint, f) except BaseException as exception: LOGGER.debug("Failed to save the pickle\n{}: {}\n" "Pickle contents: {}".format(type(exception), exception, checkpoint)) # Don't leave a half-baked pickle around if path.isfile(filename): os.remove(filename) raise f.flush()
def load_all_checkpoints(self): checkpoints = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: checkpoints.append(self.load_checkpoint_from_file(f)) except (portalocker.LockException, IOError): # Don't load locked checkpoints or those with IOErrors # these often come if the pickle was deleted since the glob pass except BaseException: LOGGER.warning( "Failed to load checkpoint '{}' (deleting)\n" "{}".format(f, traceback.format_exc())) # Deleting try: os.remove(f) except OSError: pass return checkpoints
def load_all_processes(self): """ Will detect all pickles in the running directory and will try to load them up into Processes. As soon as a pickle is considered for loading, a lock is placed on it, which is not released until the process is destroyed. This is necessary to prevent another thread from loading up the same process. :return: a list of Process instances """ processes = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: process = self.create_from_file_and_persist(f) except (portalocker.LockException, IOError): continue except BaseException: LOGGER.warning("Failed to load checkpoint '{}'\n{}".format( f, traceback.format_exc())) # Try to load the node corresponding to the corrupt pickle, set it to FAILED and seal it # At the end we will also move the pickle to the failed directory so it can be inspected for debugging try: from aiida.orm import load_node pk, extension = os.path.splitext(os.path.basename(f)) node = load_node(int(pk)) node._set_attr(node.FAILED_KEY, True) node.seal() except BaseException as exception: LOGGER.warning( 'failed to clean up the node of the corrupt pickle {}'. format(traceback.format_exc())) finally: LOGGER.warning("moving '{}' to failed directory".format(f)) try: filename = os.path.basename(f) os.rename( f, os.path.join(self.failed_directory, filename)) except OSError: pass else: processes.append(process) return processes
def on_process_wait(self, process): try: self.save(process) except pickle.PicklingError: LOGGER.error("exception raised trying to pickle process (pid={}) " "during on_wait message.".format(process.pid))