Ejemplo n.º 1
0
    def test_fs_detection_on_container_hdfs(self):
        # Create a container for testing
        zip_file_name = "test"
        zip_file_path = zip_file_name + ".zip"

        # in the zip, the leading slash will be removed
        file_name_zip = self.tmpfile_path.lstrip('/')

        # TODO(tianqi): add functionality ot pfio
        from pyarrow import hdfs

        conn = hdfs.connect()
        hdfs_home = conn.info('.')['path']
        conn.close()

        hdfs_file_path = os.path.join(hdfs_home, zip_file_path)

        shutil.make_archive(zip_file_name, "zip", base_dir=self.tmpdir.name)

        with pfio.open(hdfs_file_path, "wb") as hdfs_file:
            with pfio.open(zip_file_path, "rb") as posix_file:
                hdfs_file.write(posix_file.read())

        with pfio.open_as_container(hdfs_file_path) as container:
            with container.open(file_name_zip, "r") as f:
                self.assertEqual(f.read(), self.test_string_str)

        pfio.remove(zip_file_path)
        pfio.remove(hdfs_file_path)
Ejemplo n.º 2
0
    def test_open_as_container(self):
        # Create a container for testing
        pfio.set_root("posix")
        zip_file_name = "test"
        zip_file_path = zip_file_name + ".zip"

        # in the zip, the leading slash will be removed
        # TODO(tianqi): related to issue #61
        dirname_zip = self.tmpdir.name.lstrip('/') + '/'
        file_name_zip = self.tmpfile_path.lstrip('/')
        first_level_dir = dirname_zip.split('/')[0]

        shutil.make_archive(zip_file_name, "zip", base_dir=self.tmpdir.name)

        with pfio.open_as_container(zip_file_path) as container:
            file_generator = container.list()
            file_list = list(file_generator)
            self.assertIn(first_level_dir, file_list)
            self.assertNotIn(file_name_zip, file_list)
            self.assertNotIn("", file_list)

            file_generator = container.list(dirname_zip)
            file_list = list(file_generator)
            self.assertNotIn(first_level_dir, file_list)
            self.assertIn(os.path.basename(file_name_zip), file_list)
            self.assertNotIn("", file_list)

            self.assertTrue(container.isdir(dirname_zip))
            self.assertFalse(container.isdir(file_name_zip))

            self.assertIsInstance(container.info(), str)
            with container.open(file_name_zip, "r") as f:
                self.assertEqual(f.read(), self.test_string_str)

        pfio.remove(zip_file_path)
Ejemplo n.º 3
0
    def test_fs_detection_on_container_posix(self):
        # Create a container for testing
        zip_file_name = "test"
        zip_file_path = zip_file_name + ".zip"
        posix_file_path = "file://" + zip_file_path

        # in the zip, the leading slash will be removed
        file_name_zip = self.tmpfile_path.lstrip('/')

        shutil.make_archive(zip_file_name, "zip", base_dir=self.tmpdir.name)

        with pfio.open_as_container(posix_file_path) as container:
            with container.open(file_name_zip, "r") as f:
                self.assertEqual(f.read(), self.test_string_str)

        pfio.remove(zip_file_path)