Esempio n. 1
0
 def testWalkPostOrder(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   self._setupWalkDirectories(dir_path)
   # Now test the walk (in_order = False)
   all_dirs = []
   all_subdirs = []
   all_files = []
   for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
     all_dirs.append(w_dir)
     all_subdirs.append(w_subdirs)
     all_files.append(w_files)
   self.assertItemsEqual(all_dirs, [
       os.path.join(dir_path, item)
       for item in ["subdir1_1", "subdir1_2/subdir2", "subdir1_2", "subdir1_3"]
   ] + [dir_path])
   self.assertEqual(dir_path, all_dirs[4])
   self.assertLess(
       all_dirs.index(os.path.join(dir_path, "subdir1_2/subdir2")),
       all_dirs.index(os.path.join(dir_path, "subdir1_2")))
   self.assertItemsEqual(all_subdirs[0:4], [[], [], ["subdir2"], []])
   self.assertItemsEqual(all_subdirs[4],
                         ["subdir1_1", "subdir1_2", "subdir1_3"])
   self.assertItemsEqual(all_files, [["file2.txt"], [], [], [], ["file1.txt"]])
   self.assertLess(
       all_files.index(["file2.txt"]), all_files.index(["file1.txt"]))
Esempio n. 2
0
 def testWalkInOrder(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   self._setupWalkDirectories(dir_path)
   # Now test the walk (in_order = True)
   all_dirs = []
   all_subdirs = []
   all_files = []
   for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=True):
     all_dirs.append(w_dir)
     all_subdirs.append(w_subdirs)
     all_files.append(w_files)
   self.assertItemsEqual(all_dirs, [compat.as_bytes(dir_path)] + [
       compat.as_bytes(os.path.join(dir_path, item))
       for item in ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"]
   ])
   self.assertEqual(compat.as_bytes(dir_path), all_dirs[0])
   self.assertLess(
       all_dirs.index(compat.as_bytes(os.path.join(dir_path, "subdir1_2"))),
       all_dirs.index(
           compat.as_bytes(os.path.join(dir_path, "subdir1_2/subdir2"))))
   self.assertItemsEqual(all_subdirs[1:5], [[], [b"subdir2"], [], []])
   self.assertItemsEqual(all_subdirs[0],
                         [b"subdir1_1", b"subdir1_2", b"subdir1_3"])
   self.assertItemsEqual(all_files, [[b"file1.txt"], [b"file2.txt"], [], [],
                                     []])
   self.assertLess(
       all_files.index([b"file1.txt"]), all_files.index([b"file2.txt"]))
Esempio n. 3
0
 def testWalkInOrder(self, join):
     dir_path_str = file_io.join(self._base_dir, "test_dir")
     dir_path = join(self._base_dir, "test_dir")
     self._setupWalkDirectories(dir_path_str)
     # Now test the walk (in_order = True)
     all_dirs = []
     all_subdirs = []
     all_files = []
     for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path,
                                                     in_order=True):
         all_dirs.append(w_dir)
         all_subdirs.append(w_subdirs)
         all_files.append(w_files)
     self.assertItemsEqual(all_dirs, [dir_path_str] + [
         file_io.join(dir_path_str, item) for item in
         ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"]
     ])
     self.assertEqual(dir_path_str, all_dirs[0])
     self.assertLess(
         all_dirs.index(file_io.join(dir_path_str, "subdir1_2")),
         all_dirs.index(file_io.join(dir_path_str, "subdir1_2/subdir2")))
     self.assertItemsEqual(all_subdirs[1:5], [[], ["subdir2"], [], []])
     self.assertItemsEqual(all_subdirs[0],
                           ["subdir1_1", "subdir1_2", "subdir1_3"])
     self.assertItemsEqual(all_files,
                           [["file1.txt"], ["file2.txt"], [], [], []])
     self.assertLess(all_files.index(["file1.txt"]),
                     all_files.index(["file2.txt"]))
Esempio n. 4
0
 def testWalkPostOrder(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   self._setupWalkDirectories(dir_path)
   # Now test the walk (in_order = False)
   all_dirs = []
   all_subdirs = []
   all_files = []
   for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
     all_dirs.append(w_dir)
     all_subdirs.append(w_subdirs)
     all_files.append(w_files)
   self.assertItemsEqual(all_dirs, [
       os.path.join(dir_path, item)
       for item in
       ["subdir1_1", "subdir1_2/subdir2", "subdir1_2", "subdir1_3"]
   ] + [dir_path])
   self.assertEqual(dir_path, all_dirs[4])
   self.assertLess(
       all_dirs.index(os.path.join(dir_path, "subdir1_2/subdir2")),
       all_dirs.index(os.path.join(dir_path, "subdir1_2")))
   self.assertItemsEqual(all_subdirs[0:4], [[], [], ["subdir2"], []])
   self.assertItemsEqual(all_subdirs[4],
                         ["subdir1_1", "subdir1_2", "subdir1_3"])
   self.assertItemsEqual(all_files, [["file2.txt"], [], [], [], ["file1.txt"]])
   self.assertLess(
       all_files.index(["file2.txt"]), all_files.index(["file1.txt"]))
Esempio n. 5
0
def list_files(in_path):
    files = []
    # for (dirpath, dirnames, filenames) in os.walk(in_path):
    for (dirpath, dirnames, filenames) in file_io.walk(in_path):
        files.extend(filenames)
        break

    return files
Esempio n. 6
0
def copyDir(srcDir, dstDir):
  for dir_name, sub_dirs, leaf_files in file_io.walk(srcDir):
    # copy all the files over
    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      copyFile(leaf_file_path, dstDir)

    # Now make all the folders.
    for sub_dir in sub_dirs:
      dstSubDir = os.path.join(dstDir, sub_dir)
      file_io.create_dir(dstSubDir)
      copyDir(os.path.join(srcDir, sub_dir), dstSubDir)
Esempio n. 7
0
 def testWalkFailure(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   # Try walking a directory that wasn't created.
   all_dirs = []
   all_subdirs = []
   all_files = []
   for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
     all_dirs.append(w_dir)
     all_subdirs.append(w_subdirs)
     all_files.append(w_files)
   self.assertItemsEqual(all_dirs, [])
   self.assertItemsEqual(all_subdirs, [])
   self.assertItemsEqual(all_files, [])
Esempio n. 8
0
 def testWalkFailure(self):
   dir_path = os.path.join(self._base_dir, "test_dir")
   # Try walking a directory that wasn't created.
   all_dirs = []
   all_subdirs = []
   all_files = []
   for (w_dir, w_subdirs, w_files) in file_io.walk(dir_path, in_order=False):
     all_dirs.append(w_dir)
     all_subdirs.append(w_subdirs)
     all_files.append(w_files)
   self.assertItemsEqual(all_dirs, [])
   self.assertItemsEqual(all_subdirs, [])
   self.assertItemsEqual(all_files, [])
Esempio n. 9
0
def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.

  When called, dest_dir should exist.
  """
  for dir_name, sub_dirs, leaf_files in file_io.walk(src_dir):
    # copy all the files over
    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      _copy_all([leaf_file_path], dest_dir)

    # Now make all the folders.
    for sub_dir in sub_dirs:
      file_io.create_dir(os.path.join(dest_dir, sub_dir))
Esempio n. 10
0
def _recursive_copy(src_dir, dest_dir):
  """Copy the contents of src_dir into the folder dest_dir.

  When called, dest_dir should exist.
  """
  for dir_name, sub_dirs, leaf_files in file_io.walk(src_dir):
    # copy all the files over
    for leaf_file in leaf_files:
      leaf_file_path = os.path.join(dir_name, leaf_file)
      _copy_all([leaf_file_path], dest_dir)

    # Now make all the folders.
    for sub_dir in sub_dirs:
      file_io.create_dir(os.path.join(dest_dir, sub_dir))
Esempio n. 11
0
def main(_):

    # The Tensorflow file_io.walk() function has an issue
    # with iterating over the top level of a bucket.
    # It requires a directory within the bucket.
    # So, we give it one.
    input_url = 's3://' + args.inputbucket + "/data/"
    output_url = 's3://' + args.outputbucket + "/data/"

    os.makedirs(args.datadir)

    # first, we copy files from pachyderm into a convenient
    # local directory for processing.  The files have been
    # placed into the inputpath directory in the s3path bucket.
    print("walking {} for copying files".format(input_url))
    for dirpath, dirs, files in file_io.walk(input_url, True):
        for file in files:
            uri = os.path.join(dirpath, file)
            newpath = os.path.join(args.datadir, file)
            print("copying {} to {}".format(uri, newpath))
            file_io.copy(uri, newpath, True)


    # here is where you would apply your training to the data in args.datadir
    # it might operate on the data directly, or place additional
    # data in the same directory

    # finally, we copy the output from those operations to
    # another pachyderm repo
    print("walking {} for copying to {}".format(args.datadir, output_url))
    for dirpath, dirs, files in os.walk(args.datadir, topdown=True):   
      for file in files:
        uri = os.path.join(dirpath, file)
        newpath = output_url + file
        print("copying {} to {}".format(uri, newpath))
        file_io.copy(uri, newpath, True)