def gen_csv_from_images( input_dir: str, output_file=constants.DEFAULT_CSV_FILENAME, add_label=False, out_path_prefix='', dataset_type=constants.DEFAULT_DATASET_TYPE): """Generate AutoML dataset CSV from directory of images. Args: input_dir: Directory of images. output_file: Output CSV filename. add_label: Whether to include image label based on last directory on the image's filepath. out_path_prefix: Output path prefix to prepend to each filename. (e.g. gs://path/to/the/imagedir) dataset_type: AutoML dataset type (TRAIN, VALIDATE, TEST, UNSPECIFIED) to use for all the parsed images. """ get_label = basename if add_label else lambda _: '' with gfile.GFile(os.path.expanduser(output_file), 'w') as f: writer = csv.writer(f, delimiter=',') for topdir, _, files in gfile.walk(os.path.expanduser(input_dir)): for f in files: if out_path_prefix: filepath = os.path.join(out_path_prefix, f) else: filepath = os.path.join(topdir, f) label = get_label(topdir) row = ([dataset_type, filepath, label] + ['']*constants.NUM_BOUNDING_BOX_FIELDS) writer.writerow(row)
def ls(self, path: str, recursive=False) -> List[File]: def _get_file_stats(path: str): stat = gfile.stat(path) return File(path=path, size=stat.length, mtime=int(stat.mtime_nsec / 1e9)) if not gfile.exists(path): return [] # If it is a file if not gfile.isdir(path): return [_get_file_stats(path)] files = [] if recursive: for root, _, res in gfile.walk(path): for file in res: if not gfile.isdir(os.path.join(root, file)): files.append(_get_file_stats(os.path.join(root, file))) else: for file in gfile.listdir(path): if not gfile.isdir(os.path.join(path, file)): files.append(_get_file_stats(os.path.join(path, file))) # Files only return files
def gen_csv_from_images(input_dir: str, output_file=constants.DEFAULT_CSV_FILENAME, add_label=False, dataset_type=constants.DEFAULT_DATASET_TYPE): """Generate AutoML dataset CSV from directory of images. Args: input_dir: Directory of images. output_file: Output CSV filename. add_label: Whether to include image label based on last directory on the image's filepath. dataset_type: AutoML dataset type (TRAIN, VALIDATE, TEST, UNSPECIFIED) to use for all the parsed images. """ get_label = basename if add_label else lambda _: '' with gfile.GFile(output_file, 'w') as f: writer = csv.writer(f, delimiter=',') for topdir, _, files in gfile.walk(input_dir): for f in files: label = get_label(topdir) row = ([dataset_type, f, label] + [''] * constants.NUM_BOUNDING_BOX_FIELDS) writer.writerow(row)
def _load_trajectories(self, trajectory_dir): train_trajectories = [] eval_trajectories = [] # Search the entire directory subtree for trajectories. for (subdir, _, filenames) in gfile.walk(trajectory_dir): for filename in filenames: shard_path = os.path.join(subdir, filename) with gfile.GFile(shard_path, "rb") as f: trajectories = pickle.load(f) pivot = int(len(trajectories) * (1 - self._data_eval_frac)) train_trajectories.extend(trajectories[:pivot]) eval_trajectories.extend(trajectories[pivot:]) assert train_trajectories, "Haven't found any training data." assert eval_trajectories, "Haven't found any evaluation data." return (train_trajectories, eval_trajectories)
def load_trajectories(trajectory_dir, eval_frac): """Loads trajectories from a possibly nested directory of pickles.""" pkl_module = utils.get_pickle_module() train_trajectories = [] eval_trajectories = [] # Search the entire directory subtree for trajectories. for (subdir, _, filenames) in gfile.walk(trajectory_dir): for filename in filenames: shard_path = os.path.join(subdir, filename) with gfile.GFile(shard_path, "rb") as f: trajectories = pkl_module.load(f) pivot = int(len(trajectories) * (1 - eval_frac)) train_trajectories.extend(trajectories[:pivot]) eval_trajectories.extend(trajectories[pivot:]) assert train_trajectories, "Haven't found any training data." assert eval_trajectories, "Haven't found any evaluation data." return (train_trajectories, eval_trajectories)
def load_trajectories(trajectory_dir, eval_frac): """Loads trajectories from a possibly nested directory of pickles.""" pkl_module = utils.get_pickle_module() train_trajectories = [] eval_trajectories = [] # Search the entire directory subtree for trajectories. for (subdir, _, filenames) in gfile.walk(trajectory_dir): for filename in filenames: shard_path = os.path.join(subdir, filename) try: with gfile.GFile(shard_path, "rb") as f: trajectories = pkl_module.load(f) pivot = int(len(trajectories) * (1 - eval_frac)) train_trajectories.extend(trajectories[:pivot]) eval_trajectories.extend(trajectories[pivot:]) except EOFError: logging.warning( "Could not load trajectories from a corrupted shard %s.", shard_path, ) assert train_trajectories, "Can't find training data in %s" % trajectory_dir assert eval_trajectories, "Can't find evaluation data in %s" % trajectory_dir return train_trajectories, eval_trajectories