def test_create_handler(self): posix_handler = pfio.create_handler("posix") self.assertIsInstance(posix_handler, pfio.filesystems.posix.PosixFileSystem) hdfs_handler = pfio.create_handler("hdfs") self.assertIsInstance(hdfs_handler, pfio.filesystems.hdfs.HdfsFileSystem) another_posix_handler = pfio.create_handler("posix") self.assertNotEqual(posix_handler, another_posix_handler) with self.assertRaises(ValueError): pfio.create_handler("unsupported_scheme")
def test_mkdir(self): test_dir_name = "testmkdir" with pfio.create_handler(self.fs) as handler: handler.mkdir(test_dir_name) self.assertTrue(handler.isdir(test_dir_name)) handler.remove(test_dir_name)
def test_exists(self): non_exist_file = "non_exist_file.txt" with pfio.create_handler(self.fs) as handler: self.assertTrue(handler.exists(__file__)) self.assertTrue(handler.exists("/")) self.assertFalse(handler.exists(non_exist_file))
def test_remove(self): test_file = "test_remove.txt" test_dir = "test_dir/" nested_dir = os.path.join(test_dir, "nested_file/") nested_file = os.path.join(nested_dir, test_file) with pfio.create_handler(self.fs) as handler: with handler.open(test_file, 'w') as fp: fp.write('foobar') # test remove on one file self.assertTrue(handler.exists(test_file)) handler.remove(test_file) self.assertFalse(handler.exists(test_file)) # test remove on directory handler.makedirs(nested_dir) with handler.open(nested_file, 'w') as fp: fp.write('foobar') self.assertTrue(handler.exists(test_dir)) self.assertTrue(handler.exists(nested_dir)) self.assertTrue(handler.exists(nested_file)) handler.remove(test_dir, True) self.assertFalse(handler.exists(test_dir)) self.assertFalse(handler.exists(nested_dir)) self.assertFalse(handler.exists(nested_file))
def setUp(self): # The following zip layout is created for all the tests # outside.zip # | - testfile1 n = 1 << 20 self.test_string = make_random_str(n) self.fs_handler = pfio.create_handler("posix") # the most outside zip self.zip_file_name = "outside" # nested zip and nested file self.tmpdir = tempfile.TemporaryDirectory() # test file self.testfile_name = "testfile1" # paths used in making outside.zip testfile_path = os.path.join(self.tmpdir.name, self.testfile_name) # paths used in tests self.zip_file_path = self.zip_file_name + ".zip" with open(testfile_path, "w") as tmpfile: tmpfile.write(self.test_string) # this will include outside.zip itself into the zip make_zip(self.zip_file_path, root_dir=self.tmpdir.name, base_dir=".")
def test_stat_directory(self): test_dir_name = "testmkdir" with pfio.create_handler(self.fs) as handler: handler.mkdir(test_dir_name) expected = os.stat(test_dir_name) stat = handler.stat(test_dir_name) self.assertIsInstance(stat, PosixFileStat) self.assertTrue(stat.filename.endswith(test_dir_name)) self.assertTrue(stat.isdir()) self.assertIsInstance(stat.last_accessed, float) self.assertIsInstance(stat.last_modified, float) self.assertIsInstance(stat.created, float) keys = (('last_modified', 'st_mtime'), ('last_accessed', 'st_atime'), ('last_modified_ns', 'st_mtime_ns'), ('last_accessed_ns', 'st_atime_ns'), ('created', 'st_ctime'), ('created_ns', 'st_ctime_ns'), ('mode', 'st_mode'), ('size', 'st_size'), ('uid', 'st_uid'), ('gid', 'st_gid'), ('ino', 'st_ino'), ('dev', 'st_dev'), ('nlink', 'st_nlink')) for k, kexpect in keys: self.assertEqual(getattr(stat, k), getattr(expected, kexpect)) handler.remove(test_dir_name)
def load_snapshot(target, directory, filename=None, fs=None, fail_on_no_file=False): assert directory is not None or filename is not None if fs is None: fs = pfio elif isinstance(fs, str): fs = pfio.create_handler(fs) else: fs = fs if filename is None and directory is not None: filename = _scan_directory(fs, directory) if filename is None: if fail_on_no_file: raise RuntimeError('No snapshot found from %s' % directory) return if directory is not None: filename = os.path.join(directory, filename) with fs.open(filename, 'rb') as fp: load_npz(fp, target)
def setUp(self): # The following zip layout is created for all the tests # The difference is despite showing in the following layout for # readabilty, the directories are not included in the zip # outside.zip # | - testdir1 # | - | - testfile1 # | - | - testdir2 # | - | - | - testfile2 # | - testdir3 # | | - testfile3 # | - testfile4 self.test_string = "this is a test string\n" self.fs_handler = pfio.create_handler("posix") # the most outside zip self.zip_file_name = "outside.zip" # nested zip and nested file self.tmpdir = tempfile.TemporaryDirectory() # directory and file self.dir1_name = NO_DIRECTORY_FILENAME_LIST["dir1_name"] self.dir2_name = NO_DIRECTORY_FILENAME_LIST["dir2_name"] self.dir3_name = NO_DIRECTORY_FILENAME_LIST["dir3_name"] self.testfile1_name = NO_DIRECTORY_FILENAME_LIST["testfile1_name"] self.testfile2_name = NO_DIRECTORY_FILENAME_LIST["testfile2_name"] self.testfile3_name = NO_DIRECTORY_FILENAME_LIST["testfile3_name"] self.testfile4_name = NO_DIRECTORY_FILENAME_LIST["testfile4_name"] # paths used in making outside.zip dir1_path = os.path.join(self.tmpdir.name, self.dir1_name) dir2_path = os.path.join(dir1_path, self.dir2_name) dir3_path = os.path.join(self.tmpdir.name, self.dir3_name) testfile1_path = os.path.join(dir1_path, self.testfile1_name) testfile2_path = os.path.join(dir2_path, self.testfile2_name) testfile3_path = os.path.join(dir3_path, self.testfile3_name) testfile4_path = os.path.join(self.tmpdir.name, self.testfile4_name) # paths used in tests for dir in [dir1_path, dir2_path, dir3_path]: os.mkdir(dir) for file_path in [testfile1_path, testfile2_path, testfile3_path, testfile4_path]: with open(file_path, "w") as f: f.write(self.test_string) # create zip without directory self.pwd = os.getcwd() os.chdir(self.tmpdir.name) cmd = ["zip", "-rD", self.zip_file_name, "."] process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() assert stderr == b""
def setUp(self): self.test_string = "this is a test string\n" self.fs = "hdfs" self.tmpfile_name = "tmpfile.txt" with pfio.create_handler(self.fs) as handler: with handler.open(self.tmpfile_name, "w") as tmpfile: tmpfile.write(self.test_string)
def test_open_non_exist(self): non_exist_file = "non_exist_file.txt" if os.path.exists(non_exist_file): os.remove(non_exist_file) with pfio.create_handler(self.fs) as handler: self.assertRaises(IOError, handler.open, non_exist_file)
def setUp(self): test_string = "this is a test string\n" self.test_string_b = test_string.encode("utf-8") self.fs = "hdfs" self.tmpfile_name = "tmpfile.txt" with pfio.create_handler(self.fs) as handler: with handler.open(self.tmpfile_name, "wb") as tmpfile: tmpfile.write(self.test_string_b)
def test_makedirs(self): test_dir_name = "testmkdir/" nested_dir_name = test_dir_name + "nested_dir" with pfio.create_handler(self.fs) as handler: handler.makedirs(nested_dir_name) self.assertTrue(handler.isdir(nested_dir_name)) handler.remove(test_dir_name, True)
def test_read_bytes(self): with pfio.create_handler(self.fs) as handler: with tempfile.NamedTemporaryFile("w+b", delete=False) as tmpfile: tmpfile_path = tmpfile.name tmpfile.write(self.test_string_bytes) with handler.open(tmpfile_path, mode="rb") as loaded_file: self.assertEqual(self.test_string_bytes, loaded_file.read()) handler.remove(tmpfile_path)
def test_picle(self): pickle_file_name = "test_pickle.pickle" test_data = {'test_elem1': b'balabala', 'test_elem2': 'balabala'} with pfio.create_handler(self.fs) as handler: with handler.open(pickle_file_name, 'wb') as f: pickle.dump(test_data, f) with handler.open(pickle_file_name, 'rb') as f: loaded_obj = pickle.load(f) self.assertEqual(test_data, loaded_obj) handler.remove(pickle_file_name, True)
def __init__(self, directory: str, savefun=None, fs=None): assert directory is not None self.directory = directory self.savefun = save_npz if savefun is None else savefun if fs is None: self.fs = pfio elif isinstance(fs, str): self.fs = pfio.create_handler(fs) else: self.fs = fs if not self.fs.exists(self.directory): self.fs.makedirs(self.directory)
def test_rename(self): with pfio.create_handler(self.fs) as handler: with handler.open('src', 'w') as fp: fp.write('foobar') self.assertTrue(handler.exists('src')) self.assertFalse(handler.exists('dst')) handler.rename('src', 'dst') self.assertFalse(handler.exists('src')) self.assertTrue(handler.exists('dst')) with handler.open('dst', 'r') as fp: data = fp.read() assert data == 'foobar' handler.remove('dst')
def test_list(self): with pfio.create_handler(self.fs) as handler: file_generator = handler.list() self.assertIsInstance(file_generator, Iterable) file_list = list(file_generator) self.assertIn(self.tmpfile_name, file_list, self.tmpfile_name) # An exception is raised when the given path is not a directory self.assertRaises(NotADirectoryError, list, handler.list(self.tmpfile_name)) for test_dir_name in ["testmkdir", "testmkdir/"]: nested_dir_name1 = "nested_dir1" nested_dir_name2 = "nested_dir2" nested_file_name = "file" nested_dir1 = os.path.join(test_dir_name, nested_dir_name1) nested_dir2 = os.path.join(test_dir_name, nested_dir_name2) nested_file = os.path.join(nested_dir2, nested_file_name) nested_file_relative = os.path.join(nested_dir_name2, nested_file_name) try: handler.makedirs(nested_dir1) handler.makedirs(nested_dir2) with handler.open(nested_file, "w") as f: f.write(self.test_string) recursive_file_generator = handler.list(test_dir_name, recursive=True) self.assertIsInstance(recursive_file_generator, Iterable) file_list = list(recursive_file_generator) self.assertIn(nested_dir_name1, file_list) self.assertIn(nested_dir_name2, file_list) self.assertIn(nested_file_relative, file_list) normal_file_generator = handler.list(test_dir_name) self.assertIsInstance(recursive_file_generator, Iterable) file_list = list(normal_file_generator) self.assertIn(nested_dir_name1, file_list) self.assertIn(nested_dir_name2, file_list) self.assertNotIn(nested_file_relative, file_list) finally: handler.remove(test_dir_name, True)
def test_list(self): # directory layout # testlsdir # | - nested_dir1 # | | - nested_dir3 # | _ nested_dir2 for test_dir_name in ["testlsdir", "testlsdir/"]: try: tmpdir = tempfile.TemporaryDirectory() nested_dir_name1 = "nested_dir1" nested_dir_name2 = "nested_dir2" nested_dir_name3 = "nested_dir3" test_dir_path = os.path.join(tmpdir.name, test_dir_name) nested_dir_path1 = os.path.join(test_dir_path, nested_dir_name1) nested_dir_path2 = os.path.join(test_dir_path, nested_dir_name2) nested_dir_path3 = os.path.join(nested_dir_path1, nested_dir_name3) nested_dir_relative_path3 = os.path.join( nested_dir_name1, nested_dir_name3) with pfio.create_handler(self.fs) as handler: handler.makedirs(nested_dir_path1) handler.makedirs(nested_dir_path2) handler.makedirs(nested_dir_path3) self.assertIsInstance(handler.list(), Iterable) full_list_of_file = list( handler.list(test_dir_path, recursive=True)) self.assertIn(nested_dir_name1, full_list_of_file) self.assertIn(nested_dir_name2, full_list_of_file) self.assertIn(nested_dir_relative_path3, full_list_of_file) first_level_list_of_file = list( handler.list(test_dir_path)) self.assertIn(nested_dir_name1, first_level_list_of_file) self.assertIn(nested_dir_name2, first_level_list_of_file) self.assertNotIn(nested_dir_relative_path3, first_level_list_of_file) finally: tmpdir.cleanup()
def test_stat_directory(self): test_dir_name = "testmkdir" with pfio.create_handler(self.fs) as handler: handler.mkdir(test_dir_name) conn = hdfs.connect() expected = conn.info(test_dir_name) stat = handler.stat(test_dir_name) self.assertIsInstance(stat, HdfsFileStat) self.assertTrue(stat.filename.endswith(test_dir_name)) self.assertTrue(stat.isdir()) self.assertEqual(stat.mode & 0o777, expected['permissions']) self.assertTrue(stat.mode & 0o40000) self.assertIsInstance(stat.last_accessed, float) self.assertIsInstance(stat.last_modified, float) for k in ('size', 'owner', 'group', 'replication', 'block_size', 'kind', 'last_accessed', 'last_modified'): self.assertEqual(getattr(stat, k), expected[k]) handler.remove(test_dir_name)
def test_snapshot_hdfs(): trainer = chainer.testing.get_trainer_with_mock_updater() trainer.out = '.' trainer._done = True with pfio.create_handler('hdfs') as fs: tmpdir = "some-pfio-tmp-dir" fs.makedirs(tmpdir, exist_ok=True) file_list = list(fs.list(tmpdir)) assert len(file_list) == 0 writer = SimpleWriter(tmpdir, fs=fs) snapshot = extensions.snapshot(writer=writer) snapshot(trainer) assert 'snapshot_iter_0' in fs.list(tmpdir) trainer2 = chainer.testing.get_trainer_with_mock_updater() load_snapshot(trainer2, tmpdir, fs=fs, fail_on_no_file=True) # Cleanup fs.remove(tmpdir, recursive=True)
def test_stat_file(self): test_file_name = "testfile" with pfio.create_handler(self.fs) as handler: with handler.open(test_file_name, 'w') as fp: fp.write('foobar') conn = hdfs.connect() expected = conn.info(test_file_name) stat = handler.stat(test_file_name) self.assertIsInstance(stat, HdfsFileStat) self.assertTrue(stat.filename.endswith(test_file_name)) self.assertFalse(stat.isdir()) self.assertEqual(stat.mode & 0o777, expected['permissions']) self.assertTrue(stat.mode & 0o100000) self.assertIsInstance(stat.last_accessed, float) self.assertIsInstance(stat.last_modified, float) for k in ('size', 'owner', 'group', 'replication', 'block_size', 'kind', 'last_accessed', 'last_modified'): self.assertEqual(getattr(stat, k), expected[k]) handler.remove(test_file_name)
def setUp(self): # The following zip layout is created for all the tests # outside.zip # | - testdir1 # | | - nested1.zip # | | - nested_dir # | | - nested # | - testdir2 # | | - testfile1 # | - testfile2 self.test_string = "this is a test string\n" self.nested_test_string = \ "this is a test string for nested zip\n" self.test_string_b = self.test_string.encode("utf-8") self.nested_test_string_b = \ self.nested_test_string.encode("utf-8") self.fs_handler = pfio.create_handler("posix") # the most outside zip self.zip_file_name = "outside" # nested zip and nested file self.tmpdir = tempfile.TemporaryDirectory() self.nested_zipped_file_name = "nested" self.nested_dir_name = ZIP_TEST_FILENAME_LIST["nested_dir_name"] self.nested_dir_path = os.path.join(self.tmpdir.name, self.nested_dir_name) self.nested_zip_file_name = \ ZIP_TEST_FILENAME_LIST["nested_zip_file_name"] # directory and file self.dir_name1 = ZIP_TEST_FILENAME_LIST["dir_name1"] self.dir_name2 = ZIP_TEST_FILENAME_LIST["dir_name2"] self.zipped_file_name = ZIP_TEST_FILENAME_LIST["zipped_file_name"] self.testfile_name = ZIP_TEST_FILENAME_LIST["testfile_name"] # paths used in making outside.zip dir_path1 = os.path.join(self.tmpdir.name, self.dir_name1) dir_path2 = os.path.join(self.tmpdir.name, self.dir_name2) testfile_path = os.path.join(self.tmpdir.name, self.testfile_name) nested_dir_path = os.path.join(self.tmpdir.name, self.nested_dir_name) zipped_file_path = os.path.join(dir_path2, self.zipped_file_name) nested_zipped_file_path = os.path.join( nested_dir_path, self.nested_zipped_file_name) nested_zip_file_path = os.path.join( dir_path1, self.nested_zip_file_name) # paths used in tests self.zip_file_path = self.zip_file_name + ".zip" self.zipped_file_path = os.path.join(self.dir_name2, self.zipped_file_name) self.nested_zip_path = os.path.join( self.dir_name1, self.nested_zip_file_name) self.nested_zipped_file_path = os.path.join( self.nested_dir_name, self.nested_zipped_file_name) os.mkdir(dir_path1) os.mkdir(dir_path2) os.mkdir(nested_dir_path) with open(zipped_file_path, "w") as tmpfile: tmpfile.write(self.test_string) with open(nested_zipped_file_path, "w") as tmpfile: tmpfile.write(self.nested_test_string) with open(testfile_path, "w") as tmpfile: tmpfile.write(self.test_string) make_zip(nested_zip_file_path, root_dir=self.tmpdir.name, base_dir=self.nested_dir_name) shutil.rmtree(nested_dir_path) # this will include outside.zip itself into the zip make_zip(self.zip_file_path, root_dir=self.tmpdir.name, base_dir=".")
def test_read_string(self): with pfio.create_handler(self.fs) as handler: with handler.open(self.tmpfile_name, "r") as f: self.assertEqual(self.test_string, f.read()) with handler.open(self.tmpfile_name, "r") as f: self.assertEqual(self.test_string, f.readline())
def tearDown(self): with pfio.create_handler(self.fs) as handler: try: handler.remove(self.tmpfile_name) except IOError: pass
def test_isdir(self): with pfio.create_handler(self.fs) as handler: self.assertTrue(handler.isdir("/")) self.assertFalse(handler.isdir("test_posix_handler.py"))
def test_read_bytes(self): with pfio.create_handler(self.fs) as handler: with handler.open(self.tmpfile_name, "rb") as f: self.assertEqual(self.test_string_b, f.read())
def test_isdir(self): with pfio.create_handler(self.fs) as handler: self.assertTrue(handler.isdir("/")) self.assertFalse(handler.isdir(self.tmpfile_name))
def test_info(self): with pfio.create_handler(self.fs) as handler: self.assertIsInstance(handler.info(), str)
def test_read_non_exist(self): non_exist_file = "non_exist_file.txt" with pfio.create_handler(self.fs) as handler: self.assertRaises(IOError, handler.open, non_exist_file)