def testWalkPostOrder(self): dir_path = os.path.join(self._base_dir, "test_dir") self._setupWalkDirectories(dir_path) # Now test the walk (in_order = False) all_dirs = [] all_subdirs = [] all_files = [] for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False): all_dirs.append(w_dir) all_subdirs.append(w_subdirs) all_files.append(w_files) self.assertItemsEqual(all_dirs, [ os.path.join(dir_path, item) for item in ["subdir1_1", "subdir1_2/subdir2", "subdir1_2", "subdir1_3"] ] + [dir_path]) self.assertEqual(dir_path, all_dirs[4]) self.assertLess( all_dirs.index(os.path.join(dir_path, "subdir1_2/subdir2")), all_dirs.index(os.path.join(dir_path, "subdir1_2"))) self.assertItemsEqual(all_subdirs[0:4], [[], [], ["subdir2"], []]) self.assertItemsEqual(all_subdirs[4], ["subdir1_1", "subdir1_2", "subdir1_3"]) self.assertItemsEqual(all_files, [["file2.txt"], [], [], [], ["file1.txt"]]) self.assertLess( all_files.index(["file2.txt"]), all_files.index(["file1.txt"]))
def testWalkInOrder(self): dir_path = os.path.join(self._base_dir, "test_dir") self._setupWalkDirectories(dir_path) # Now test the walk (in_order = True) all_dirs = [] all_subdirs = [] all_files = [] for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=True): all_dirs.append(w_dir) all_subdirs.append(w_subdirs) all_files.append(w_files) self.assertItemsEqual(all_dirs, [compat.as_bytes(dir_path)] + [ compat.as_bytes(os.path.join(dir_path, item)) for item in ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"] ]) self.assertEqual(compat.as_bytes(dir_path), all_dirs[0]) self.assertLess( all_dirs.index(compat.as_bytes(os.path.join(dir_path, "subdir1_2"))), all_dirs.index( compat.as_bytes(os.path.join(dir_path, "subdir1_2/subdir2")))) self.assertItemsEqual(all_subdirs[1:5], [[], [b"subdir2"], [], []]) self.assertItemsEqual(all_subdirs[0], [b"subdir1_1", b"subdir1_2", b"subdir1_3"]) self.assertItemsEqual(all_files, [[b"file1.txt"], [b"file2.txt"], [], [], []]) self.assertLess( all_files.index([b"file1.txt"]), all_files.index([b"file2.txt"]))
def testWalkInOrder(self, join): dir_path_str = file_io.join(self._base_dir, "test_dir") dir_path = join(self._base_dir, "test_dir") self._setupWalkDirectories(dir_path_str) # Now test the walk (in_order = True) all_dirs = [] all_subdirs = [] all_files = [] for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=True): all_dirs.append(w_dir) all_subdirs.append(w_subdirs) all_files.append(w_files) self.assertItemsEqual(all_dirs, [dir_path_str] + [ file_io.join(dir_path_str, item) for item in ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"] ]) self.assertEqual(dir_path_str, all_dirs[0]) self.assertLess( all_dirs.index(file_io.join(dir_path_str, "subdir1_2")), all_dirs.index(file_io.join(dir_path_str, "subdir1_2/subdir2"))) self.assertItemsEqual(all_subdirs[1:5], [[], ["subdir2"], [], []]) self.assertItemsEqual(all_subdirs[0], ["subdir1_1", "subdir1_2", "subdir1_3"]) self.assertItemsEqual(all_files, [["file1.txt"], ["file2.txt"], [], [], []]) self.assertLess(all_files.index(["file1.txt"]), all_files.index(["file2.txt"]))
def list_files(in_path): files = [] # for (dirpath, dirnames, filenames) in os.walk(in_path): for (dirpath, dirnames, filenames) in file_io.walk(in_path): files.extend(filenames) break return files
def copyDir(srcDir, dstDir): for dir_name, sub_dirs, leaf_files in file_io.walk(srcDir): # copy all the files over for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) copyFile(leaf_file_path, dstDir) # Now make all the folders. for sub_dir in sub_dirs: dstSubDir = os.path.join(dstDir, sub_dir) file_io.create_dir(dstSubDir) copyDir(os.path.join(srcDir, sub_dir), dstSubDir)
def testWalkFailure(self): dir_path = os.path.join(self._base_dir, "test_dir") # Try walking a directory that wasn't created. all_dirs = [] all_subdirs = [] all_files = [] for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False): all_dirs.append(w_dir) all_subdirs.append(w_subdirs) all_files.append(w_files) self.assertItemsEqual(all_dirs, []) self.assertItemsEqual(all_subdirs, []) self.assertItemsEqual(all_files, [])
def _recursive_copy(src_dir, dest_dir): """Copy the contents of src_dir into the folder dest_dir. When called, dest_dir should exist. """ for dir_name, sub_dirs, leaf_files in file_io.walk(src_dir): # copy all the files over for leaf_file in leaf_files: leaf_file_path = os.path.join(dir_name, leaf_file) _copy_all([leaf_file_path], dest_dir) # Now make all the folders. for sub_dir in sub_dirs: file_io.create_dir(os.path.join(dest_dir, sub_dir))
def main(_): # The Tensorflow file_io.walk() function has an issue # with iterating over the top level of a bucket. # It requires a directory within the bucket. # So, we give it one. input_url = 's3://' + args.inputbucket + "/data/" output_url = 's3://' + args.outputbucket + "/data/" os.makedirs(args.datadir) # first, we copy files from pachyderm into a convenient # local directory for processing. The files have been # placed into the inputpath directory in the s3path bucket. print("walking {} for copying files".format(input_url)) for dirpath, dirs, files in file_io.walk(input_url, True): for file in files: uri = os.path.join(dirpath, file) newpath = os.path.join(args.datadir, file) print("copying {} to {}".format(uri, newpath)) file_io.copy(uri, newpath, True) # here is where you would apply your training to the data in args.datadir # it might operate on the data directly, or place additional # data in the same directory # finally, we copy the output from those operations to # another pachyderm repo print("walking {} for copying to {}".format(args.datadir, output_url)) for dirpath, dirs, files in os.walk(args.datadir, topdown=True): for file in files: uri = os.path.join(dirpath, file) newpath = output_url + file print("copying {} to {}".format(uri, newpath)) file_io.copy(uri, newpath, True)