def gen_csv_from_images(
    input_dir: str,
    output_file=constants.DEFAULT_CSV_FILENAME,
    add_label=False,
    out_path_prefix='',
    dataset_type=constants.DEFAULT_DATASET_TYPE):
  """Generate AutoML dataset CSV from directory of images.

  Args:
    input_dir: Directory of images.
    output_file: Output CSV filename.
    add_label: Whether to include image label based on
      last directory on the image's filepath.
    out_path_prefix: Output path prefix to prepend to each filename.
      (e.g. gs://path/to/the/imagedir)
    dataset_type: AutoML dataset type (TRAIN, VALIDATE, TEST, UNSPECIFIED)
      to use for all the parsed images.
  """

  get_label = basename if add_label else lambda _: ''

  with gfile.GFile(os.path.expanduser(output_file), 'w') as f:
    writer = csv.writer(f, delimiter=',')
    for topdir, _, files in gfile.walk(os.path.expanduser(input_dir)):
      for f in files:
        if out_path_prefix:
          filepath = os.path.join(out_path_prefix, f)
        else:
          filepath = os.path.join(topdir, f)
        label = get_label(topdir)
        row = ([dataset_type, filepath, label] +
               ['']*constants.NUM_BOUNDING_BOX_FIELDS)
        writer.writerow(row)
Exemple #2
0
    def ls(self, path: str, recursive=False) -> List[File]:
        def _get_file_stats(path: str):
            stat = gfile.stat(path)
            return File(path=path,
                        size=stat.length,
                        mtime=int(stat.mtime_nsec / 1e9))

        if not gfile.exists(path):
            return []
        # If it is a file
        if not gfile.isdir(path):
            return [_get_file_stats(path)]

        files = []
        if recursive:
            for root, _, res in gfile.walk(path):
                for file in res:
                    if not gfile.isdir(os.path.join(root, file)):
                        files.append(_get_file_stats(os.path.join(root, file)))
        else:
            for file in gfile.listdir(path):
                if not gfile.isdir(os.path.join(path, file)):
                    files.append(_get_file_stats(os.path.join(path, file)))
        # Files only
        return files
Exemple #3
0
def gen_csv_from_images(input_dir: str,
                        output_file=constants.DEFAULT_CSV_FILENAME,
                        add_label=False,
                        dataset_type=constants.DEFAULT_DATASET_TYPE):
    """Generate AutoML dataset CSV from directory of images.

  Args:
    input_dir: Directory of images.
    output_file: Output CSV filename.
    add_label: Whether to include image label based on
      last directory on the image's filepath.
    dataset_type: AutoML dataset type (TRAIN, VALIDATE, TEST, UNSPECIFIED)
      to use for all the parsed images.
  """

    get_label = basename if add_label else lambda _: ''

    with gfile.GFile(output_file, 'w') as f:
        writer = csv.writer(f, delimiter=',')
        for topdir, _, files in gfile.walk(input_dir):
            for f in files:
                label = get_label(topdir)
                row = ([dataset_type, f, label] +
                       [''] * constants.NUM_BOUNDING_BOX_FIELDS)
                writer.writerow(row)
 def _load_trajectories(self, trajectory_dir):
     train_trajectories = []
     eval_trajectories = []
     # Search the entire directory subtree for trajectories.
     for (subdir, _, filenames) in gfile.walk(trajectory_dir):
         for filename in filenames:
             shard_path = os.path.join(subdir, filename)
             with gfile.GFile(shard_path, "rb") as f:
                 trajectories = pickle.load(f)
                 pivot = int(len(trajectories) * (1 - self._data_eval_frac))
                 train_trajectories.extend(trajectories[:pivot])
                 eval_trajectories.extend(trajectories[pivot:])
     assert train_trajectories, "Haven't found any training data."
     assert eval_trajectories, "Haven't found any evaluation data."
     return (train_trajectories, eval_trajectories)
Exemple #5
0
def load_trajectories(trajectory_dir, eval_frac):
    """Loads trajectories from a possibly nested directory of pickles."""
    pkl_module = utils.get_pickle_module()
    train_trajectories = []
    eval_trajectories = []
    # Search the entire directory subtree for trajectories.
    for (subdir, _, filenames) in gfile.walk(trajectory_dir):
        for filename in filenames:
            shard_path = os.path.join(subdir, filename)
            with gfile.GFile(shard_path, "rb") as f:
                trajectories = pkl_module.load(f)
                pivot = int(len(trajectories) * (1 - eval_frac))
                train_trajectories.extend(trajectories[:pivot])
                eval_trajectories.extend(trajectories[pivot:])
    assert train_trajectories, "Haven't found any training data."
    assert eval_trajectories, "Haven't found any evaluation data."
    return (train_trajectories, eval_trajectories)
Exemple #6
0
def load_trajectories(trajectory_dir, eval_frac):
  """Loads trajectories from a possibly nested directory of pickles."""
  pkl_module = utils.get_pickle_module()
  train_trajectories = []
  eval_trajectories = []
  # Search the entire directory subtree for trajectories.
  for (subdir, _, filenames) in gfile.walk(trajectory_dir):
    for filename in filenames:
      shard_path = os.path.join(subdir, filename)
      try:
        with gfile.GFile(shard_path, "rb") as f:
          trajectories = pkl_module.load(f)
        pivot = int(len(trajectories) * (1 - eval_frac))
        train_trajectories.extend(trajectories[:pivot])
        eval_trajectories.extend(trajectories[pivot:])
      except EOFError:
        logging.warning(
            "Could not load trajectories from a corrupted shard %s.",
            shard_path,
        )
  assert train_trajectories, "Can't find training data in %s" % trajectory_dir
  assert eval_trajectories, "Can't find evaluation data in %s" % trajectory_dir
  return train_trajectories, eval_trajectories