Beispiel #1
0
def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval):
    """Starts a thread to automatically reload the given multiplexer.

  The thread will reload the multiplexer by calling `ReloadMultiplexer` every
  `load_interval` seconds, starting immediately.

  Args:
    multiplexer: The `EventMultiplexer` to add runs to and reload.
    path_to_run: A dict mapping from paths to run names, where `None` as the run
      name is interpreted as a run name equal to the path.
    load_interval: How many seconds to wait after one load before starting the
      next load.

  Returns:
    A started `threading.Thread` that reloads the multiplexer.
  """
    # We don't call multiplexer.Reload() here because that would make
    # AddRunsFromDirectory block until the runs have all loaded.
    for path in path_to_run.keys():
        if gcs.IsGCSPath(path):
            gcs.CheckIsSupported()
            logging.info(
                'Assuming %s is intended to be a Google Cloud Storage path because '
                'it starts with %s. If it isn\'t, prefix it with \'/.\' (i.e., use '
                '/.%s instead)', path, gcs.PATH_PREFIX, path)

    def _ReloadForever():
        while True:
            ReloadMultiplexer(multiplexer, path_to_run)
            time.sleep(load_interval)

    thread = threading.Thread(target=_ReloadForever)
    thread.daemon = True
    thread.start()
    return thread
def ListDirectoryAbsolute(directory):
    """Yields all files in the given directory. The paths are absolute."""
    if gcs.IsGCSPath(directory):
        return gcs.ListDirectory(directory)
    else:
        return (os.path.join(directory, path)
                for path in gfile.ListDirectory(directory))
Beispiel #3
0
  def _SetPath(self, path):
    old_path = self._path
    if old_path and not gcs.IsGCSPath(old_path):
      # We're done with the path, so store its size.
      size = io_wrapper.Size(old_path)
      logging.debug('Setting latest size of %s to %d', old_path, size)
      self._finalized_sizes[old_path] = size

    self._path = path
    self._loader = self._loader_factory(path)
Beispiel #4
0
def ParseEventFilesSpec(logdir):
    """Parses `logdir` into a map from paths to run group names.

  The events files flag format is a comma-separated list of path specifications.
  A path specification either looks like 'group_name:/path/to/directory' or
  '/path/to/directory'; in the latter case, the group is unnamed. Group names
  cannot start with a forward slash: /foo:bar/baz will be interpreted as a
  spec with no name and path '/foo:bar/baz'.

  Globs are not supported.

  Args:
    logdir: A comma-separated list of run specifications.
  Returns:
    A dict mapping directory paths to names like {'/path/to/directory': 'name'}.
    Groups without an explicit name are named after their path. If logdir is
    None, returns an empty dict, which is helpful for testing things that don't
    require any valid runs.
  """
    files = {}
    if logdir is None:
        return files
    for specification in logdir.split(','):
        # If it's a gcs path, don't split on colon
        if gcs.IsGCSPath(specification):
            run_name = None
            path = specification
        # If the spec looks like /foo:bar/baz, then we assume it's a path with a
        # colon.
        elif ':' in specification and specification[0] != '/':
            # We split at most once so run_name:/path:with/a/colon will work.
            run_name, _, path = specification.partition(':')
        else:
            run_name = None
            path = specification

        if not os.path.isabs(path) and not gcs.IsGCSPath(path):
            # Create absolute path out of relative one.
            path = os.path.join(os.path.realpath('.'), path)

        files[path] = run_name
    return files
def CreateFileLoader(path):
    """Creates a file loader for the given path.

  Args:
    path: A string representing either a normal path or a GCS
  Returns:
    An object with a Load() method that yields event_pb2.Event protos.
  """
    if gcs.IsGCSPath(path):
        return gcs_file_loader.GCSFileLoader(path)
    else:
        return event_file_loader.EventFileLoader(path)
    def AddRunsFromDirectory(self, path, name=None):
        """Load runs from a directory; recursively walks subdirectories.

    If path doesn't exist, no-op. This ensures that it is safe to call
      `AddRunsFromDirectory` multiple times, even before the directory is made.

    If path is a directory, load event files in the directory (if any exist) and
      recursively call AddRunsFromDirectory on any subdirectories. This mean you
      can call AddRunsFromDirectory at the root of a tree of event logs and
      TensorBoard will load them all.

    If the `EventMultiplexer` is already loaded this will cause
    the newly created accumulators to `Reload()`.
    Args:
      path: A string path to a directory to load runs from.
      name: Optionally, what name to apply to the runs. If name is provided
        and the directory contains run subdirectories, the name of each subrun
        is the concatenation of the parent name and the subdirectory name. If
        name is provided and the directory contains event files, then a run
        is added called "name" and with the events from the path.

    Raises:
      ValueError: If the path exists and isn't a directory.

    Returns:
      The `EventMultiplexer`.
    """
        subdirs = []
        if gcs.IsGCSPath(path):
            subdirs = [
                subdir
                for (subdir, files) in gcs.ListRecursively(path) if list(
                    filter(event_accumulator.IsTensorFlowEventsFile, files))
            ]
        else:
            if not gfile.Exists(path):
                return  # Maybe it hasn't been created yet, fail silently to retry later
            if not gfile.IsDirectory(path):
                raise ValueError(
                    'AddRunsFromDirectory: path exists and is not a '
                    'directory, %s' % path)
            subdirs = [
                subdir for (subdir, _, files) in gfile.Walk(path) if list(
                    filter(event_accumulator.IsTensorFlowEventsFile, files))
            ]

        for subdir in subdirs:
            logging.info('Adding events from directory %s', subdir)
            rpath = os.path.relpath(subdir, path)
            subname = os.path.join(name, rpath) if name else rpath
            self.AddRun(subdir, name=subname)

        return self
Beispiel #7
0
def _GeneratorFromPath(path):
    """Create an event generator for file or directory at given path string."""
    if gcs.IsGCSPath(path):
        provider = directory_watcher.SequentialGCSProvider(
            path, path_filter=IsTensorFlowEventsFile)
        return directory_watcher.DirectoryWatcher(
            provider, gcs_file_loader.GCSFileLoader)
    elif gfile.IsDirectory(path):
        provider = directory_watcher.SequentialGFileProvider(
            path, path_filter=IsTensorFlowEventsFile)
        return directory_watcher.DirectoryWatcher(
            provider, event_file_loader.EventFileLoader)
    else:
        return event_file_loader.EventFileLoader(path)
def ListRecursively(top):
    """Walks a directory tree, yielding (dir_path, file_paths) tuples.

  For each of `top` and its subdirectories, yields a tuple containing the path
  to the directory and the path to each of the contained files.  Note that
  unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file
  paths are all absolute.

  If the directory does not exist, this yields nothing.

  Args:
    top: A path to a directory..
  Yields:
    A list of (dir_path, file_paths) tuples.
  """
    if gcs.IsGCSPath(top):
        for x in gcs.ListRecursively(top):
            yield x
    else:
        for dir_path, _, filenames in gfile.Walk(top):
            yield (dir_path, (os.path.join(dir_path, filename)
                              for filename in filenames))
Beispiel #9
0
    def _SetPath(self, path):
        """Sets the current path to watch for new events.

    This also records the size of the old path, if any. If the size can't be
    found, an error is logged.

    Args:
      path: The full path of the file to watch.
    """
        old_path = self._path
        if old_path and not gcs.IsGCSPath(old_path):
            try:
                # We're done with the path, so store its size.
                size = io_wrapper.Size(old_path)
                logging.debug('Setting latest size of %s to %d', old_path,
                              size)
                self._finalized_sizes[old_path] = size
            except errors.OpError as e:
                logging.error('Unable to get size of %s: %s', old_path, e)

        self._path = path
        self._loader = self._loader_factory(path)
Beispiel #10
0
    def _GetNextPath(self):
        """Gets the next path to load from.

    This function also does the checking for out-of-order writes as it iterates
    through the paths.

    Returns:
      The next path to load events from, or None if there are no more paths.
    """
        paths = sorted(
            path for path in io_wrapper.ListDirectoryAbsolute(self._directory)
            if self._path_filter(path))
        if not paths:
            return None

        if self._path is None:
            return paths[0]

        # Don't bother checking if the paths are GCS (which we can't check) or if
        # we've already detected an OOO write.
        if not gcs.IsGCSPath(paths[0]) and not self._ooo_writes_detected:
            # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes.
            current_path_index = bisect.bisect_left(paths, self._path)
            ooo_check_start = max(
                0, current_path_index - self._OOO_WRITE_CHECK_COUNT)
            for path in paths[ooo_check_start:current_path_index]:
                if self._HasOOOWrite(path):
                    self._ooo_writes_detected = True
                    break

        next_paths = list(path for path in paths
                          if self._path is None or path > self._path)
        if next_paths:
            return min(next_paths)
        else:
            return None
def Size(path):
    """Returns the number of bytes in the given file. Doesn't work on GCS."""
    if gcs.IsGCSPath(path):
        raise NotImplementedError("io_wrapper.Size doesn't support GCS paths")
    else:
        return gfile.Open(path).Size()
def Exists(path):
    if gcs.IsGCSPath(path):
        return gcs.Exists(path)
    else:
        return gfile.Exists(path)
def IsDirectory(path):
    """Returns true if path exists and is a directory."""
    if gcs.IsGCSPath(path):
        return gcs.IsDirectory(path)
    else:
        return gfile.IsDirectory(path)
Beispiel #14
0
 def __init__(self, gcs_path):
     if not gcs.IsGCSPath(gcs_path):
         raise ValueError('A GCS path is required')
     self._gcs_path = gcs_path
     self._gcs_offset = 0