Example #1
0
    def test_create_handler(self):
        posix_handler = pfio.create_handler("posix")
        self.assertIsInstance(posix_handler,
                              pfio.filesystems.posix.PosixFileSystem)

        hdfs_handler = pfio.create_handler("hdfs")
        self.assertIsInstance(hdfs_handler,
                              pfio.filesystems.hdfs.HdfsFileSystem)

        another_posix_handler = pfio.create_handler("posix")
        self.assertNotEqual(posix_handler, another_posix_handler)

        with self.assertRaises(ValueError):
            pfio.create_handler("unsupported_scheme")
Example #2
0
    def test_mkdir(self):
        test_dir_name = "testmkdir"
        with pfio.create_handler(self.fs) as handler:
            handler.mkdir(test_dir_name)
            self.assertTrue(handler.isdir(test_dir_name))

            handler.remove(test_dir_name)
Example #3
0
    def test_exists(self):
        non_exist_file = "non_exist_file.txt"

        with pfio.create_handler(self.fs) as handler:
            self.assertTrue(handler.exists(__file__))
            self.assertTrue(handler.exists("/"))
            self.assertFalse(handler.exists(non_exist_file))
Example #4
0
    def test_remove(self):
        test_file = "test_remove.txt"
        test_dir = "test_dir/"
        nested_dir = os.path.join(test_dir, "nested_file/")
        nested_file = os.path.join(nested_dir, test_file)

        with pfio.create_handler(self.fs) as handler:
            with handler.open(test_file, 'w') as fp:
                fp.write('foobar')

            # test remove on one file
            self.assertTrue(handler.exists(test_file))
            handler.remove(test_file)
            self.assertFalse(handler.exists(test_file))

            # test remove on directory
            handler.makedirs(nested_dir)
            with handler.open(nested_file, 'w') as fp:
                fp.write('foobar')

            self.assertTrue(handler.exists(test_dir))
            self.assertTrue(handler.exists(nested_dir))
            self.assertTrue(handler.exists(nested_file))

            handler.remove(test_dir, True)

            self.assertFalse(handler.exists(test_dir))
            self.assertFalse(handler.exists(nested_dir))
            self.assertFalse(handler.exists(nested_file))
Example #5
0
    def setUp(self):
        # The following zip layout is created for all the tests
        # outside.zip
        # | - testfile1

        n = 1 << 20
        self.test_string = make_random_str(n)
        self.fs_handler = pfio.create_handler("posix")

        # the most outside zip
        self.zip_file_name = "outside"

        # nested zip and nested file
        self.tmpdir = tempfile.TemporaryDirectory()

        # test file
        self.testfile_name = "testfile1"

        # paths used in making outside.zip
        testfile_path = os.path.join(self.tmpdir.name, self.testfile_name)

        # paths used in tests
        self.zip_file_path = self.zip_file_name + ".zip"

        with open(testfile_path, "w") as tmpfile:
            tmpfile.write(self.test_string)

        # this will include outside.zip itself into the zip
        make_zip(self.zip_file_path,
                 root_dir=self.tmpdir.name,
                 base_dir=".")
Example #6
0
    def test_stat_directory(self):
        test_dir_name = "testmkdir"
        with pfio.create_handler(self.fs) as handler:
            handler.mkdir(test_dir_name)

            expected = os.stat(test_dir_name)

            stat = handler.stat(test_dir_name)
            self.assertIsInstance(stat, PosixFileStat)
            self.assertTrue(stat.filename.endswith(test_dir_name))
            self.assertTrue(stat.isdir())
            self.assertIsInstance(stat.last_accessed, float)
            self.assertIsInstance(stat.last_modified, float)
            self.assertIsInstance(stat.created, float)
            keys = (('last_modified', 'st_mtime'), ('last_accessed',
                                                    'st_atime'),
                    ('last_modified_ns', 'st_mtime_ns'), ('last_accessed_ns',
                                                          'st_atime_ns'),
                    ('created', 'st_ctime'), ('created_ns', 'st_ctime_ns'),
                    ('mode', 'st_mode'), ('size', 'st_size'),
                    ('uid', 'st_uid'), ('gid', 'st_gid'), ('ino', 'st_ino'),
                    ('dev', 'st_dev'), ('nlink', 'st_nlink'))
            for k, kexpect in keys:
                self.assertEqual(getattr(stat, k), getattr(expected, kexpect))

            handler.remove(test_dir_name)
Example #7
0
def load_snapshot(target,
                  directory,
                  filename=None,
                  fs=None,
                  fail_on_no_file=False):
    assert directory is not None or filename is not None
    if fs is None:
        fs = pfio
    elif isinstance(fs, str):
        fs = pfio.create_handler(fs)
    else:
        fs = fs

    if filename is None and directory is not None:
        filename = _scan_directory(fs, directory)

    if filename is None:
        if fail_on_no_file:
            raise RuntimeError('No snapshot found from %s' % directory)
        return

    if directory is not None:
        filename = os.path.join(directory, filename)

    with fs.open(filename, 'rb') as fp:
        load_npz(fp, target)
Example #8
0
    def setUp(self):
        # The following zip layout is created for all the tests
        # The difference is despite showing in the following layout for
        # readabilty, the directories are not included in the zip
        # outside.zip
        # | - testdir1
        # | - | - testfile1
        # | - | - testdir2
        # | - | - | - testfile2
        # | - testdir3
        # |   | - testfile3
        # | - testfile4

        self.test_string = "this is a test string\n"
        self.fs_handler = pfio.create_handler("posix")

        # the most outside zip
        self.zip_file_name = "outside.zip"

        # nested zip and nested file
        self.tmpdir = tempfile.TemporaryDirectory()

        # directory and file
        self.dir1_name = NO_DIRECTORY_FILENAME_LIST["dir1_name"]
        self.dir2_name = NO_DIRECTORY_FILENAME_LIST["dir2_name"]
        self.dir3_name = NO_DIRECTORY_FILENAME_LIST["dir3_name"]
        self.testfile1_name = NO_DIRECTORY_FILENAME_LIST["testfile1_name"]
        self.testfile2_name = NO_DIRECTORY_FILENAME_LIST["testfile2_name"]
        self.testfile3_name = NO_DIRECTORY_FILENAME_LIST["testfile3_name"]
        self.testfile4_name = NO_DIRECTORY_FILENAME_LIST["testfile4_name"]

        # paths used in making outside.zip
        dir1_path = os.path.join(self.tmpdir.name, self.dir1_name)
        dir2_path = os.path.join(dir1_path, self.dir2_name)
        dir3_path = os.path.join(self.tmpdir.name, self.dir3_name)
        testfile1_path = os.path.join(dir1_path, self.testfile1_name)
        testfile2_path = os.path.join(dir2_path, self.testfile2_name)
        testfile3_path = os.path.join(dir3_path, self.testfile3_name)
        testfile4_path = os.path.join(self.tmpdir.name, self.testfile4_name)

        # paths used in tests
        for dir in [dir1_path, dir2_path, dir3_path]:
            os.mkdir(dir)

        for file_path in [testfile1_path, testfile2_path,
                          testfile3_path, testfile4_path]:
            with open(file_path, "w") as f:
                f.write(self.test_string)

        # create zip without directory
        self.pwd = os.getcwd()
        os.chdir(self.tmpdir.name)
        cmd = ["zip", "-rD", self.zip_file_name, "."]

        process = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()

        assert stderr == b""
Example #9
0
    def setUp(self):
        self.test_string = "this is a test string\n"
        self.fs = "hdfs"
        self.tmpfile_name = "tmpfile.txt"

        with pfio.create_handler(self.fs) as handler:
            with handler.open(self.tmpfile_name, "w") as tmpfile:
                tmpfile.write(self.test_string)
Example #10
0
    def test_open_non_exist(self):

        non_exist_file = "non_exist_file.txt"
        if os.path.exists(non_exist_file):
            os.remove(non_exist_file)

        with pfio.create_handler(self.fs) as handler:
            self.assertRaises(IOError, handler.open, non_exist_file)
Example #11
0
    def setUp(self):
        test_string = "this is a test string\n"
        self.test_string_b = test_string.encode("utf-8")
        self.fs = "hdfs"
        self.tmpfile_name = "tmpfile.txt"

        with pfio.create_handler(self.fs) as handler:
            with handler.open(self.tmpfile_name, "wb") as tmpfile:
                tmpfile.write(self.test_string_b)
Example #12
0
    def test_makedirs(self):
        test_dir_name = "testmkdir/"
        nested_dir_name = test_dir_name + "nested_dir"

        with pfio.create_handler(self.fs) as handler:
            handler.makedirs(nested_dir_name)
            self.assertTrue(handler.isdir(nested_dir_name))

            handler.remove(test_dir_name, True)
Example #13
0
    def test_read_bytes(self):

        with pfio.create_handler(self.fs) as handler:
            with tempfile.NamedTemporaryFile("w+b", delete=False) as tmpfile:
                tmpfile_path = tmpfile.name
                tmpfile.write(self.test_string_bytes)

            with handler.open(tmpfile_path, mode="rb") as loaded_file:
                self.assertEqual(self.test_string_bytes, loaded_file.read())

            handler.remove(tmpfile_path)
Example #14
0
    def test_picle(self):
        pickle_file_name = "test_pickle.pickle"
        test_data = {'test_elem1': b'balabala', 'test_elem2': 'balabala'}

        with pfio.create_handler(self.fs) as handler:
            with handler.open(pickle_file_name, 'wb') as f:
                pickle.dump(test_data, f)
            with handler.open(pickle_file_name, 'rb') as f:
                loaded_obj = pickle.load(f)
                self.assertEqual(test_data, loaded_obj)

            handler.remove(pickle_file_name, True)
Example #15
0
    def __init__(self, directory: str, savefun=None, fs=None):
        assert directory is not None
        self.directory = directory
        self.savefun = save_npz if savefun is None else savefun
        if fs is None:
            self.fs = pfio
        elif isinstance(fs, str):
            self.fs = pfio.create_handler(fs)
        else:
            self.fs = fs

        if not self.fs.exists(self.directory):
            self.fs.makedirs(self.directory)
Example #16
0
    def test_rename(self):
        with pfio.create_handler(self.fs) as handler:
            with handler.open('src', 'w') as fp:
                fp.write('foobar')

            self.assertTrue(handler.exists('src'))
            self.assertFalse(handler.exists('dst'))

            handler.rename('src', 'dst')
            self.assertFalse(handler.exists('src'))
            self.assertTrue(handler.exists('dst'))

            with handler.open('dst', 'r') as fp:
                data = fp.read()
                assert data == 'foobar'

            handler.remove('dst')
Example #17
0
    def test_list(self):
        with pfio.create_handler(self.fs) as handler:
            file_generator = handler.list()
            self.assertIsInstance(file_generator, Iterable)
            file_list = list(file_generator)
            self.assertIn(self.tmpfile_name, file_list, self.tmpfile_name)

            # An exception is raised when the given path is not a directory
            self.assertRaises(NotADirectoryError, list,
                              handler.list(self.tmpfile_name))
            for test_dir_name in ["testmkdir", "testmkdir/"]:
                nested_dir_name1 = "nested_dir1"
                nested_dir_name2 = "nested_dir2"
                nested_file_name = "file"
                nested_dir1 = os.path.join(test_dir_name, nested_dir_name1)
                nested_dir2 = os.path.join(test_dir_name, nested_dir_name2)
                nested_file = os.path.join(nested_dir2,  nested_file_name)
                nested_file_relative = os.path.join(nested_dir_name2,
                                                    nested_file_name)

                try:
                    handler.makedirs(nested_dir1)
                    handler.makedirs(nested_dir2)

                    with handler.open(nested_file, "w") as f:
                        f.write(self.test_string)

                    recursive_file_generator = handler.list(test_dir_name,
                                                            recursive=True)
                    self.assertIsInstance(recursive_file_generator, Iterable)
                    file_list = list(recursive_file_generator)
                    self.assertIn(nested_dir_name1, file_list)
                    self.assertIn(nested_dir_name2, file_list)
                    self.assertIn(nested_file_relative, file_list)

                    normal_file_generator = handler.list(test_dir_name)
                    self.assertIsInstance(recursive_file_generator, Iterable)
                    file_list = list(normal_file_generator)
                    self.assertIn(nested_dir_name1, file_list)
                    self.assertIn(nested_dir_name2, file_list)
                    self.assertNotIn(nested_file_relative, file_list)
                finally:
                    handler.remove(test_dir_name, True)
Example #18
0
    def test_list(self):
        # directory layout
        # testlsdir
        # | - nested_dir1
        # |   | - nested_dir3
        # | _ nested_dir2
        for test_dir_name in ["testlsdir", "testlsdir/"]:
            try:
                tmpdir = tempfile.TemporaryDirectory()
                nested_dir_name1 = "nested_dir1"
                nested_dir_name2 = "nested_dir2"
                nested_dir_name3 = "nested_dir3"
                test_dir_path = os.path.join(tmpdir.name, test_dir_name)
                nested_dir_path1 = os.path.join(test_dir_path,
                                                nested_dir_name1)
                nested_dir_path2 = os.path.join(test_dir_path,
                                                nested_dir_name2)
                nested_dir_path3 = os.path.join(nested_dir_path1,
                                                nested_dir_name3)
                nested_dir_relative_path3 = os.path.join(
                    nested_dir_name1, nested_dir_name3)

                with pfio.create_handler(self.fs) as handler:
                    handler.makedirs(nested_dir_path1)
                    handler.makedirs(nested_dir_path2)
                    handler.makedirs(nested_dir_path3)

                    self.assertIsInstance(handler.list(), Iterable)
                    full_list_of_file = list(
                        handler.list(test_dir_path, recursive=True))
                    self.assertIn(nested_dir_name1, full_list_of_file)
                    self.assertIn(nested_dir_name2, full_list_of_file)
                    self.assertIn(nested_dir_relative_path3, full_list_of_file)

                    first_level_list_of_file = list(
                        handler.list(test_dir_path))
                    self.assertIn(nested_dir_name1, first_level_list_of_file)
                    self.assertIn(nested_dir_name2, first_level_list_of_file)
                    self.assertNotIn(nested_dir_relative_path3,
                                     first_level_list_of_file)
            finally:
                tmpdir.cleanup()
Example #19
0
    def test_stat_directory(self):
        test_dir_name = "testmkdir"
        with pfio.create_handler(self.fs) as handler:
            handler.mkdir(test_dir_name)

            conn = hdfs.connect()
            expected = conn.info(test_dir_name)

            stat = handler.stat(test_dir_name)
            self.assertIsInstance(stat, HdfsFileStat)
            self.assertTrue(stat.filename.endswith(test_dir_name))
            self.assertTrue(stat.isdir())
            self.assertEqual(stat.mode & 0o777, expected['permissions'])
            self.assertTrue(stat.mode & 0o40000)
            self.assertIsInstance(stat.last_accessed, float)
            self.assertIsInstance(stat.last_modified, float)
            for k in ('size', 'owner', 'group', 'replication',
                      'block_size', 'kind', 'last_accessed', 'last_modified'):
                self.assertEqual(getattr(stat, k), expected[k])

            handler.remove(test_dir_name)
Example #20
0
def test_snapshot_hdfs():
    trainer = chainer.testing.get_trainer_with_mock_updater()
    trainer.out = '.'
    trainer._done = True

    with pfio.create_handler('hdfs') as fs:
        tmpdir = "some-pfio-tmp-dir"
        fs.makedirs(tmpdir, exist_ok=True)
        file_list = list(fs.list(tmpdir))
        assert len(file_list) == 0

        writer = SimpleWriter(tmpdir, fs=fs)
        snapshot = extensions.snapshot(writer=writer)
        snapshot(trainer)

        assert 'snapshot_iter_0' in fs.list(tmpdir)

        trainer2 = chainer.testing.get_trainer_with_mock_updater()
        load_snapshot(trainer2, tmpdir, fs=fs, fail_on_no_file=True)

        # Cleanup
        fs.remove(tmpdir, recursive=True)
Example #21
0
    def test_stat_file(self):
        test_file_name = "testfile"

        with pfio.create_handler(self.fs) as handler:
            with handler.open(test_file_name, 'w') as fp:
                fp.write('foobar')

            conn = hdfs.connect()
            expected = conn.info(test_file_name)

            stat = handler.stat(test_file_name)
            self.assertIsInstance(stat, HdfsFileStat)
            self.assertTrue(stat.filename.endswith(test_file_name))
            self.assertFalse(stat.isdir())
            self.assertEqual(stat.mode & 0o777, expected['permissions'])
            self.assertTrue(stat.mode & 0o100000)
            self.assertIsInstance(stat.last_accessed, float)
            self.assertIsInstance(stat.last_modified, float)
            for k in ('size', 'owner', 'group', 'replication',
                      'block_size', 'kind', 'last_accessed', 'last_modified'):
                self.assertEqual(getattr(stat, k), expected[k])

            handler.remove(test_file_name)
Example #22
0
    def setUp(self):
        # The following zip layout is created for all the tests
        # outside.zip
        # | - testdir1
        # |   | - nested1.zip
        # |       | - nested_dir
        # |           | - nested
        # | - testdir2
        # |   | - testfile1
        # | - testfile2
        self.test_string = "this is a test string\n"
        self.nested_test_string = \
            "this is a test string for nested zip\n"
        self.test_string_b = self.test_string.encode("utf-8")
        self.nested_test_string_b = \
            self.nested_test_string.encode("utf-8")
        self.fs_handler = pfio.create_handler("posix")

        # the most outside zip
        self.zip_file_name = "outside"

        # nested zip and nested file
        self.tmpdir = tempfile.TemporaryDirectory()
        self.nested_zipped_file_name = "nested"
        self.nested_dir_name = ZIP_TEST_FILENAME_LIST["nested_dir_name"]
        self.nested_dir_path = os.path.join(self.tmpdir.name,
                                            self.nested_dir_name)
        self.nested_zip_file_name = \
            ZIP_TEST_FILENAME_LIST["nested_zip_file_name"]

        # directory and file
        self.dir_name1 = ZIP_TEST_FILENAME_LIST["dir_name1"]
        self.dir_name2 = ZIP_TEST_FILENAME_LIST["dir_name2"]
        self.zipped_file_name = ZIP_TEST_FILENAME_LIST["zipped_file_name"]
        self.testfile_name = ZIP_TEST_FILENAME_LIST["testfile_name"]

        # paths used in making outside.zip
        dir_path1 = os.path.join(self.tmpdir.name, self.dir_name1)
        dir_path2 = os.path.join(self.tmpdir.name, self.dir_name2)
        testfile_path = os.path.join(self.tmpdir.name, self.testfile_name)
        nested_dir_path = os.path.join(self.tmpdir.name, self.nested_dir_name)
        zipped_file_path = os.path.join(dir_path2, self.zipped_file_name)
        nested_zipped_file_path = os.path.join(
            nested_dir_path, self.nested_zipped_file_name)
        nested_zip_file_path = os.path.join(
            dir_path1, self.nested_zip_file_name)

        # paths used in tests
        self.zip_file_path = self.zip_file_name + ".zip"
        self.zipped_file_path = os.path.join(self.dir_name2,
                                             self.zipped_file_name)
        self.nested_zip_path = os.path.join(
            self.dir_name1, self.nested_zip_file_name)
        self.nested_zipped_file_path = os.path.join(
            self.nested_dir_name, self.nested_zipped_file_name)

        os.mkdir(dir_path1)
        os.mkdir(dir_path2)
        os.mkdir(nested_dir_path)

        with open(zipped_file_path, "w") as tmpfile:
            tmpfile.write(self.test_string)

        with open(nested_zipped_file_path, "w") as tmpfile:
            tmpfile.write(self.nested_test_string)

        with open(testfile_path, "w") as tmpfile:
            tmpfile.write(self.test_string)

        make_zip(nested_zip_file_path,
                 root_dir=self.tmpdir.name,
                 base_dir=self.nested_dir_name)
        shutil.rmtree(nested_dir_path)

        # this will include outside.zip itself into the zip
        make_zip(self.zip_file_path,
                 root_dir=self.tmpdir.name,
                 base_dir=".")
Example #23
0
 def test_read_string(self):
     with pfio.create_handler(self.fs) as handler:
         with handler.open(self.tmpfile_name, "r") as f:
             self.assertEqual(self.test_string, f.read())
         with handler.open(self.tmpfile_name, "r") as f:
             self.assertEqual(self.test_string, f.readline())
Example #24
0
 def tearDown(self):
     with pfio.create_handler(self.fs) as handler:
         try:
             handler.remove(self.tmpfile_name)
         except IOError:
             pass
Example #25
0
 def test_isdir(self):
     with pfio.create_handler(self.fs) as handler:
         self.assertTrue(handler.isdir("/"))
         self.assertFalse(handler.isdir("test_posix_handler.py"))
Example #26
0
 def test_read_bytes(self):
     with pfio.create_handler(self.fs) as handler:
         with handler.open(self.tmpfile_name, "rb") as f:
             self.assertEqual(self.test_string_b, f.read())
Example #27
0
 def test_isdir(self):
     with pfio.create_handler(self.fs) as handler:
         self.assertTrue(handler.isdir("/"))
         self.assertFalse(handler.isdir(self.tmpfile_name))
Example #28
0
 def test_info(self):
     with pfio.create_handler(self.fs) as handler:
         self.assertIsInstance(handler.info(), str)
Example #29
0
    def test_read_non_exist(self):
        non_exist_file = "non_exist_file.txt"

        with pfio.create_handler(self.fs) as handler:
            self.assertRaises(IOError, handler.open, non_exist_file)