def _load_checkpoints(self, checkpointDirs): """Load a checkpoint file into a lookup table. The data being loaded from the pickle file mostly contains input attributes of the task: func, args, kwargs, env... To simplify the check of whether the exact task has been completed in the checkpoint, we hash these input params and use it as the key for the memoized lookup table. Args: - checkpointDirs (list) : List of filepaths to checkpoints Eg. ['runinfo/001', 'runinfo/002'] Returns: - memoized_lookup_table (dict) """ memo_lookup_table = {} for checkpoint_dir in checkpointDirs: logger.info("Loading checkpoints from {}".format(checkpoint_dir)) checkpoint_file = os.path.join(checkpoint_dir, 'tasks.pkl') try: with open(checkpoint_file, 'rb') as f: while True: try: data = pickle.load(f) # Copy and hash only the input attributes memo_fu = Future() if data['exception']: memo_fu.set_exception(data['exception']) else: memo_fu.set_result(data['result']) memo_lookup_table[data['hash']] = memo_fu except EOFError: # Done with the checkpoint file break except FileNotFoundError: reason = "Checkpoint file was not found: {}".format( checkpoint_file) logger.error(reason) raise BadCheckpoint(reason) except Exception: reason = "Failed to load checkpoint: {}".format( checkpoint_file) logger.error(reason) raise BadCheckpoint(reason) logger.info( "Completed loading checkpoint: {0} with {1} tasks".format( checkpoint_file, len(memo_lookup_table.keys()))) return memo_lookup_table
def load_checkpoints(self, checkpointDirs): """Load checkpoints from the checkpoint files into a dictionary. The results are used to pre-populate the memoizer's lookup_table Kwargs: - checkpointDirs (list) : List of run folder to use as checkpoints Eg. ['runinfo/001', 'runinfo/002'] Returns: - dict containing, hashed -> future mappings """ self.memo_lookup_table = None if not checkpointDirs: return {} if type(checkpointDirs) is not list: raise BadCheckpoint("checkpointDirs expects a list of checkpoints") return self._load_checkpoints(checkpointDirs)