Exemple #1
0
def extract(log_dir: str, should_connect: bool = True):
    global etb_logger

    log_scanner = scan_log_dirs.LogScanner(should_connect=should_connect,
                                           logger=etb_logger)

    log_dir = os.path.normpath(log_dir)

    etb_logger.info("looping over %s (log_dir)", log_dir)

    while True:
        # The design allows for there to be top-level log files, as well as subdirectory logs that
        # contain some sub-run logs.  These are indexed in the training data service with the subdir
        # in the MetaInfo inner struct.  Because these directories can be built as we're processing,
        # we check all the paths on each iteration.
        # etb_logger.debug("considering if %s (log_dir) exists (top_level=%r)", log_dir, top_level)

        if not os.path.exists(log_dir):
            time.sleep(1)
            continue

        # etb_logger.debug("log_dir DOES exist: %s", log_dir)

        # logging.debug("calling scan for the log_scanner")
        log_scanner.scan(log_dir=log_dir,
                         is_log=match_log_file.is_log_file,
                         push_function=push_log_line.push,
                         should_loop=False)

        for summary_dir in event_multiplexer.GetLogdirSubdirectories(log_dir):
            summary_dir = os.path.normpath(summary_dir)

            run_id = ""
            top_log_dir = log_dir

            if is_sub_run_dir(summary_dir, log_dir):
                run_id = subdir_below_logdir(summary_dir, log_dir)
                top_log_dir = dir_below_logdir(summary_dir, log_dir)

            if top_log_dir not in log_scanner.log_runners:
                td_client = log_scanner.td_client
                if run_id != "":
                    # In this case, we'll establish a new buffered tds client, since it
                    # has to know which directory to write to.
                    etb_logger.info("Creating buffered client for run_id: %s",
                                    run_id)
                    td_client = tdb.TrainingDataClientBuffered(
                        td_client.td_client)
                run_tracker = Run_tracker(
                    td_client=td_client,
                    log_dir=top_log_dir,
                    sub_identifier=run_id,
                    lines_counter=log_scanner.lines_counter)

                if run_tracker is not None:
                    log_scanner.log_runners[top_log_dir] = run_tracker
                    run_tracker.build_event_trackers()
                    run_tracker.start()

        time.sleep(.5)
Exemple #2
0
def get_inspection_units(logdir='', event_file='', tag=''):
    """Returns a list of InspectionUnit objects given either logdir or event_file.

  If logdir is given, the number of InspectionUnits should equal the
  number of directories or subdirectories that contain event files.

  If event_file is given, the number of InspectionUnits should be 1.

  Args:
    logdir: A log directory that contains event files.
    event_file: Or, a particular event file path.
    tag: An optional tag name to query for.

  Returns:
    A list of InspectionUnit objects.
  """
    if logdir:
        subdirs = event_multiplexer.GetLogdirSubdirectories(logdir)
        inspection_units = []
        for subdir in subdirs:
            generator = itertools.chain(*[
                generator_from_event_file(os.path.join(subdir, f))
                for f in tf.gfile.ListDirectory(subdir)
                if event_accumulator.IsTensorFlowEventsFile(
                    os.path.join(subdir, f))
            ])
            inspection_units.append(
                InspectionUnit(name=subdir,
                               generator=generator,
                               field_to_obs=get_field_to_observations_map(
                                   generator, tag)))
        if inspection_units:
            print('Found event files in:\n{}\n'.format('\n'.join(
                [u.name for u in inspection_units])))
        elif event_accumulator.IsTensorFlowEventsFile(logdir):
            print(
                'It seems that {} may be an event file instead of a logdir. If this '
                'is the case, use --event_file instead of --logdir to pass '
                'it in.'.format(logdir))
        else:
            print('No event files found within logdir {}'.format(logdir))
        return inspection_units
    elif event_file:
        generator = generator_from_event_file(event_file)
        return [
            InspectionUnit(name=event_file,
                           generator=generator,
                           field_to_obs=get_field_to_observations_map(
                               generator, tag))
        ]
    return []
Exemple #3
0
    def build_event_trackers(self):

        # Check to see if new group sub directories have been added
        for summary_dir in event_multiplexer.GetLogdirSubdirectories(
                self.log_dir + '/'):
            # in the case of this the top level run tracker, this will screen out
            # sub run directories.  Otherwise log_dir will be the sub run directory itself,
            # and this should not fire.
            if not is_sub_run_dir(summary_dir, self.log_dir):
                Run_tracker.sync_event_files_for_nfs_cache(summary_dir)
                group = os.path.basename(summary_dir)
                if summary_dir not in self.summary_dirs:
                    self.summary_dirs.append(summary_dir)
                    tracker = Tracker(summary_dir, self.log_dir,
                                      self.sub_identifier, group,
                                      self.td_client)
                    self.event_trackers.append(tracker)
Exemple #4
0
def generators_from_logdir(logdir):
  """Returns a list of event generators for subdirectories with event files.

  The number of generators returned should equal the number of directories
  within logdir that contain event files. If only logdir contains event files,
  returns a list of length one.

  Args:
    logdir: A log directory that contains event files.

  Returns:
    List of event generators for each subdirectory with event files.
  """
  subdirs = event_multiplexer.GetLogdirSubdirectories(logdir)
  generators = [
      itertools.chain(*[
          generator_from_event_file(os.path.join(subdir, f))
          for f in tf.gfile.ListDirectory(subdir)
          if event_accumulator.IsTensorFlowEventsFile(os.path.join(subdir, f))
      ]) for subdir in subdirs
  ]
  return generators
Exemple #5
0
 def _maybe_scalars(self, fields, run):
     from tensorboard.backend.event_processing import event_multiplexer
     from tensorboard.backend.event_processing import event_accumulator
     _ensure_tf_logger_patched()
     scalars = {}
     for path in event_multiplexer.GetLogdirSubdirectories(run.path):
         events_checksum_field_name = self._events_checksum_field_name(path)
         last_checksum = fields.get(events_checksum_field_name)
         cur_checksum = self._events_checksum(path)
         log.debug("event path checksums for %s: last=%s, cur=%s", path,
                   last_checksum, cur_checksum)
         if last_checksum == cur_checksum:
             continue
         scalars[events_checksum_field_name] = cur_checksum
         log.debug("indexing events in %s", path)
         rel_path = os.path.relpath(path, run.path)
         events = event_accumulator._GeneratorFromPath(path).Load()
         scalar_vals = self._scalar_vals(events, rel_path)
         for key, vals in scalar_vals.items():
             if not vals:
                 continue
             self._store_scalar_vals(key, vals, scalars)
     return scalars
Exemple #6
0
def _refresh_event_loaders(opdir):
    for subdir in event_multiplexer.GetLogdirSubdirectories(opdir):
        name = os.path.relpath(subdir, opdir)
        if name not in event_loaders:
            event_loaders[name] = _init_event_loader(subdir)