Example #1
0
 def _save_noraise(self, process):
     try:
         self.save(process)
     except BaseException:
         LOGGER.error(
             "Exception raised trying to pickle process (pid={})\n{}".
             format(process.pid, traceback.format_exc()))
Example #2
0
    def load_all_processes(self):
        """
        Will detect all pickles in the running directory and will try to load
        them up into Processes. As soon as a pickle is considered for loading,
        a lock is placed on it, which is not released until the process is
        destroyed. This is necessary to prevent another thread from loading up
        the same process.

        :return: a list of Process instances
        """
        processes = []
        for f in glob.glob(path.join(self._running_directory, "*.pickle")):
            try:
                process = self.create_from_file_and_persist(f)
            except (portalocker.LockException, IOError):
                continue
            except BaseException:
                LOGGER.warning("Failed to load checkpoint '{}' (deleting)\n{}"
                    .format(f, traceback.format_exc()))

                try:
                    os.remove(f)
                except OSError:
                    pass

            else:
                processes.append(process)

        return processes
Example #3
0
 def on_process_playing(self, process):
     try:
         self._filelocks[process.pid].acquire()
     except portalocker.LockException:
         LOGGER.warning(
             "Couldn't acquire file lock for '{}', not persisting.".format(
                 self.get_running_path(process.pid)))
         del self._filelocks[process.pid]
Example #4
0
 def on_process_finish(self, process):
     try:
         self.save(process)
         self._release_process(process.pid, self.finished_directory)
     except pickle.PicklingError:
         LOGGER.error("exception raised trying to pickle process (pid={}) "
                      "during on_finish message.".format(process.pid))
     except ValueError:
         pass
Example #5
0
    def load_all_checkpoints(self):
        checkpoints = []
        for f in glob.glob(path.join(self._running_directory, "*.pickle")):
            try:
                checkpoints.append(self.load_checkpoint_from_file(f))
            except BaseException as e:
                LOGGER.warning(
                    "Failed to load checkpoint {} because of exception\n"
                    "{}".format(f, e.message))

        return checkpoints
Example #6
0
    def persist_process(self, process):
        # If the process doesn't have a persisted state then persist it now
        if not path.isfile(self.get_running_path(process.pid)):
            try:
                self.save(process)
            except pickle.PicklingError as e:
                LOGGER.error(
                    "exception raised trying to pickle process (pid={}).\n"
                    "{}".format(process.pid, e.message))

        try:
            process.add_process_listener(self)
        except AssertionError:
            # Happens if we're already listening
            pass
Example #7
0
    def save(self, process):
        self._ensure_directory(self._running_directory)
        filename = self.get_running_path(process.pid)
        lock = self._filelocks.get(process.pid, RLock(filename, 'w+b', timeout=0))

        with lock as f:
            checkpoint = self.create_bundle(process)
            self._clear(f)
            try:
                pickle.dump(checkpoint, f)
            except BaseException as exception:
                LOGGER.debug("Failed to save the pickle\n{}: {}\n"
                             "Pickle contents: {}".format(type(exception), exception, checkpoint))
                # Don't leave a half-baked pickle around
                if path.isfile(filename):
                    os.remove(filename)
                raise
            f.flush()
Example #8
0
    def load_all_checkpoints(self):
        checkpoints = []
        for f in glob.glob(path.join(self._running_directory, "*.pickle")):
            try:
                checkpoints.append(self.load_checkpoint_from_file(f))
            except (portalocker.LockException, IOError):
                # Don't load locked checkpoints or those with IOErrors
                # these often come if the pickle was deleted since the glob
                pass
            except BaseException:
                LOGGER.warning(
                    "Failed to load checkpoint '{}' (deleting)\n"
                    "{}".format(f, traceback.format_exc()))

                # Deleting
                try:
                    os.remove(f)
                except OSError:
                    pass

        return checkpoints
Example #9
0
    def load_all_processes(self):
        """
        Will detect all pickles in the running directory and will try to load
        them up into Processes. As soon as a pickle is considered for loading,
        a lock is placed on it, which is not released until the process is
        destroyed. This is necessary to prevent another thread from loading up
        the same process.

        :return: a list of Process instances
        """
        processes = []
        for f in glob.glob(path.join(self._running_directory, "*.pickle")):
            try:
                process = self.create_from_file_and_persist(f)
            except (portalocker.LockException, IOError):
                continue
            except BaseException:
                LOGGER.warning("Failed to load checkpoint '{}'\n{}".format(
                    f, traceback.format_exc()))

                # Try to load the node corresponding to the corrupt pickle, set it to FAILED and seal it
                # At the end we will also move the pickle to the failed directory so it can be inspected for debugging
                try:
                    from aiida.orm import load_node
                    pk, extension = os.path.splitext(os.path.basename(f))
                    node = load_node(int(pk))
                    node._set_attr(node.FAILED_KEY, True)
                    node.seal()
                except BaseException as exception:
                    LOGGER.warning(
                        'failed to clean up the node of the corrupt pickle {}'.
                        format(traceback.format_exc()))
                finally:
                    LOGGER.warning("moving '{}' to failed directory".format(f))
                    try:
                        filename = os.path.basename(f)
                        os.rename(
                            f, os.path.join(self.failed_directory, filename))
                    except OSError:
                        pass

            else:
                processes.append(process)

        return processes
Example #10
0
 def on_process_wait(self, process):
     try:
         self.save(process)
     except pickle.PicklingError:
         LOGGER.error("exception raised trying to pickle process (pid={}) "
                      "during on_wait message.".format(process.pid))