Beispiel #1
0
class FakeHadoopFileSystem:
    def __init__(self, *args, **kwargs):
        from pyarrow.fs import LocalFileSystem

        self._root = Path(_hdfs_root.name)
        self._fs = LocalFileSystem()

    def _path(self, path):
        from pyarrow.fs import FileSelector

        if isinstance(path, FileSelector):
            return FileSelector(
                os.fspath(self._root / path.base_dir.lstrip("/")),
                path.allow_not_found,
                path.recursive,
            )

        return os.fspath(self._root / path.lstrip("/"))

    def create_dir(self, path):
        return self._fs.create_dir(self._path(path))

    def open_input_stream(self, path):
        return self._fs.open_input_stream(self._path(path))

    def open_output_stream(self, path):
        import posixpath

        # NOTE: HadoopFileSystem.open_output_stream creates directories
        # automatically.
        self.create_dir(posixpath.dirname(path))
        return self._fs.open_output_stream(self._path(path))

    def get_file_info(self, path):
        return self._fs.get_file_info(self._path(path))

    def move(self, from_path, to_path):
        self._fs.move(self._path(from_path), self._path(to_path))

    def delete_file(self, path):
        self._fs.delete_file(self._path(path))
Beispiel #2
0
class FakeHadoopFileSystem:
    def __init__(self, *args, **kwargs):
        from pyarrow.fs import LocalFileSystem

        self._root = Path(_hdfs_root.name)
        self._fs = LocalFileSystem()

    def _path(self, path):
        from pyarrow.fs import FileSelector

        if isinstance(path, FileSelector):
            return FileSelector(
                os.fspath(self._root / path.base_dir.lstrip("/")),
                path.allow_not_found,
                path.recursive,
            )
        if isinstance(path, list):
            return [self._path(sub_path) for sub_path in path]

        return os.fspath(self._root / path.lstrip("/"))

    def create_dir(self, path, **kwargs):
        return self._fs.create_dir(self._path(path), **kwargs)

    def open_input_stream(self, path, **kwargs):
        return self._fs.open_input_stream(self._path(path), **kwargs)

    def open_output_stream(self, path, **kwargs):
        import posixpath

        # NOTE: HadoopFileSystem.open_output_stream creates directories
        # automatically.
        self.create_dir(posixpath.dirname(path))
        return self._fs.open_output_stream(self._path(path), **kwargs)

    def get_file_info(self, path, **kwargs):
        from pyarrow.fs import FileInfo

        entries = self._fs.get_file_info(self._path(path), **kwargs)
        if isinstance(entries, FileInfo):
            ret = self._adjust_entry(entries)
        else:
            assert isinstance(entries, list)
            ret = list(map(self._adjust_entry, entries))

        #        import pdb; pdb.set_trace()

        return ret

    def _adjust_entry(self, entry):
        import posixpath

        from pyarrow.fs import FileInfo

        mocked_path = os.path.relpath(entry.path, self._root)
        mocked_parts = mocked_path.split(os.path.sep)
        return FileInfo(
            path=posixpath.join(*mocked_parts),
            type=entry.type,
            mtime=entry.mtime,
            size=entry.size,
        )

    def move(self, from_path, to_path):
        self._fs.move(self._path(from_path), self._path(to_path))

    def delete_file(self, path):
        self._fs.delete_file(self._path(path))