class FakeHadoopFileSystem: def __init__(self, *args, **kwargs): from pyarrow.fs import LocalFileSystem self._root = Path(_hdfs_root.name) self._fs = LocalFileSystem() def _path(self, path): from pyarrow.fs import FileSelector if isinstance(path, FileSelector): return FileSelector( os.fspath(self._root / path.base_dir.lstrip("/")), path.allow_not_found, path.recursive, ) return os.fspath(self._root / path.lstrip("/")) def create_dir(self, path): return self._fs.create_dir(self._path(path)) def open_input_stream(self, path): return self._fs.open_input_stream(self._path(path)) def open_output_stream(self, path): import posixpath # NOTE: HadoopFileSystem.open_output_stream creates directories # automatically. self.create_dir(posixpath.dirname(path)) return self._fs.open_output_stream(self._path(path)) def get_file_info(self, path): return self._fs.get_file_info(self._path(path)) def move(self, from_path, to_path): self._fs.move(self._path(from_path), self._path(to_path)) def delete_file(self, path): self._fs.delete_file(self._path(path))
class FakeHadoopFileSystem: def __init__(self, *args, **kwargs): from pyarrow.fs import LocalFileSystem self._root = Path(_hdfs_root.name) self._fs = LocalFileSystem() def _path(self, path): from pyarrow.fs import FileSelector if isinstance(path, FileSelector): return FileSelector( os.fspath(self._root / path.base_dir.lstrip("/")), path.allow_not_found, path.recursive, ) if isinstance(path, list): return [self._path(sub_path) for sub_path in path] return os.fspath(self._root / path.lstrip("/")) def create_dir(self, path, **kwargs): return self._fs.create_dir(self._path(path), **kwargs) def open_input_stream(self, path, **kwargs): return self._fs.open_input_stream(self._path(path), **kwargs) def open_output_stream(self, path, **kwargs): import posixpath # NOTE: HadoopFileSystem.open_output_stream creates directories # automatically. self.create_dir(posixpath.dirname(path)) return self._fs.open_output_stream(self._path(path), **kwargs) def get_file_info(self, path, **kwargs): from pyarrow.fs import FileInfo entries = self._fs.get_file_info(self._path(path), **kwargs) if isinstance(entries, FileInfo): ret = self._adjust_entry(entries) else: assert isinstance(entries, list) ret = list(map(self._adjust_entry, entries)) # import pdb; pdb.set_trace() return ret def _adjust_entry(self, entry): import posixpath from pyarrow.fs import FileInfo mocked_path = os.path.relpath(entry.path, self._root) mocked_parts = mocked_path.split(os.path.sep) return FileInfo( path=posixpath.join(*mocked_parts), type=entry.type, mtime=entry.mtime, size=entry.size, ) def move(self, from_path, to_path): self._fs.move(self._path(from_path), self._path(to_path)) def delete_file(self, path): self._fs.delete_file(self._path(path))