def _LoadNewPaths(self): """Checks the directory for any new paths that may have been created. Loads them into self._paths. """ paths = sorted( path for path in io_wrapper.ListDirectoryAbsolute(self._directory) if self._path_filter(path)) for path in paths: if path not in self._paths: logger.info('New path detected: %s.' % path) self._paths[path] = _EventPathLoader(path, self._loader_factory)
def testListDirectoryAbsolute(self): temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir()) self._CreateDeepDirectoryStructure(temp_dir) expected_files = ( 'foo', 'bar', 'quuz', 'a.tfevents.1', 'model.ckpt', 'waldo', ) self.assertItemsEqual( (os.path.join(temp_dir, f) for f in expected_files), io_wrapper.ListDirectoryAbsolute(temp_dir))
def testListDirectoryAbsolute(self): temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir()) self._CreateDeepDirectoryStructure(temp_dir) expected_files = ( "foo", "bar", "quuz", "a.tfevents.1", "model.ckpt", "waldo", ) self.assertItemsEqual( (os.path.join(temp_dir, f) for f in expected_files), io_wrapper.ListDirectoryAbsolute(temp_dir), )
def Load(self): """Loads new values from all active files. Yields: All values that have not been yielded yet. Raises: DirectoryDeletedError: If the directory has been permanently deleted (as opposed to being temporarily unavailable). """ try: all_paths = io_wrapper.ListDirectoryAbsolute(self._directory) paths = sorted(p for p in all_paths if self._path_filter(p)) for path in paths: for value in self._LoadPath(path): yield value except tf.errors.OpError as e: if not tf.io.gfile.exists(self._directory): raise directory_watcher.DirectoryDeletedError( "Directory %s has been permanently deleted" % self._directory) else: logger.info("Ignoring error during file loading: %s" % e)
def _GetNextPath(self): """Gets the next path to load from. This function also does the checking for out-of-order writes as it iterates through the paths. Returns: The next path to load events from, or None if there are no more paths. """ paths = sorted( path for path in io_wrapper.ListDirectoryAbsolute(self._directory) if self._path_filter(path)) if not paths: return None if self._path is None: return paths[0] # Don't bother checking if the paths are GCS (which we can't check) or if # we've already detected an OOO write. if not io_wrapper.IsGCSPath( paths[0]) and not self._ooo_writes_detected: # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes. current_path_index = bisect.bisect_left(paths, self._path) ooo_check_start = max( 0, current_path_index - self._OOO_WRITE_CHECK_COUNT) for path in paths[ooo_check_start:current_path_index]: if self._HasOOOWrite(path): self._ooo_writes_detected = True break next_paths = list(path for path in paths if self._path is None or path > self._path) if next_paths: return min(next_paths) else: return None
def testListDirectoryAbsolute(self): temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir()) # Add a few subdirectories. directory_names = ('foo', 'bar', 'we/must/go/deeper') for directory_name in directory_names: os.makedirs(os.path.join(temp_dir, directory_name)) # Add a few files to the directory. file_names = ('events.out.tfevents.1473720381.foo.com', 'model.ckpt', 'we/must_not_include_this_file_in_the_listing.txt') for file_name in file_names: open(os.path.join(temp_dir, file_name), 'w').close() expected_files = ( 'foo', 'bar', 'we', 'events.out.tfevents.1473720381.foo.com', 'model.ckpt', ) self.assertItemsEqual( (os.path.join(temp_dir, f) for f in expected_files), io_wrapper.ListDirectoryAbsolute(temp_dir))