def subtree_s3fs(request, s3fs): prefix = 'pyarrow-filesystem/prefix/' return dict( fs=SubTreeFileSystem(prefix, s3fs['fs']), pathfn=prefix.__add__, allow_copy_file=True, allow_move_dir=False, allow_append_to_file=False, )
def subtree_localfs(request, tempdir, localfs): prefix = 'subtree/prefix/' (tempdir / prefix).mkdir(parents=True) return dict( fs=SubTreeFileSystem(prefix, localfs['fs']), pathfn=prefix.__add__, allow_move_dir=True, allow_append_to_file=True, )
def test_read_pandas_passthrough_keywords(tempdir): # ARROW-11464 - previously not all keywords were passed through (such as # the filesystem keyword) df = pd.DataFrame({'a': [1, 2, 3]}) filename = tempdir / 'data.parquet' _write_table(df, filename) result = pq.read_pandas('data.parquet', filesystem=SubTreeFileSystem( str(tempdir), LocalFileSystem())) assert result.equals(pa.table(df))
def test_filesystem_equals(): fs0 = LocalFileSystem() fs1 = LocalFileSystem() fs2 = _MockFileSystem() assert fs0.equals(fs0) assert fs0.equals(fs1) with pytest.raises(TypeError): fs0.equals('string') assert fs0 == fs0 == fs1 assert fs0 != 4 assert fs2 == fs2 assert fs2 != _MockFileSystem() assert SubTreeFileSystem('/base', fs0) == SubTreeFileSystem('/base', fs0) assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/base', fs2) assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0)
def _ensure_fs(fs_or_uri): from pyarrow.fs import ( FileSystem, LocalFileSystem, SubTreeFileSystem, FileType, _ensure_filesystem ) if isinstance(fs_or_uri, str): # instantiate the file system from an uri, if the uri has a path # component then it will be treated as a path prefix filesystem, prefix = FileSystem.from_uri(fs_or_uri) is_local = isinstance(filesystem, LocalFileSystem) prefix = filesystem.normalize_path(prefix) if prefix: # validate that the prefix is pointing to a directory prefix_info = filesystem.get_file_info([prefix])[0] if prefix_info.type != FileType.Directory: raise ValueError( "The path component of the filesystem URI must point to a " "directory but it has a type: `{}`. The path component " "is `{}` and the given filesystem URI is `{}`".format( prefix_info.type.name, prefix_info.path, fs_or_uri ) ) filesystem = SubTreeFileSystem(prefix, filesystem) return filesystem, is_local try: filesystem = _ensure_filesystem(fs_or_uri) except TypeError: raise TypeError( '`filesystem` argument must be a FileSystem instance or a valid ' 'file system URI' ) if isinstance(filesystem, (LocalFileSystem, _MockFileSystem)): return filesystem, True else: return filesystem, False
try: import pathlib except ImportError: import pathlib2 as pathlib # py2 compat import pytest from pyarrow import ArrowIOError from pyarrow.fs import (FileType, Selector, FileSystem, LocalFileSystem, SubTreeFileSystem) from pyarrow.tests.test_io import gzip_compress, gzip_decompress @pytest.fixture(params=[ pytest.param(lambda tmp: LocalFileSystem(), id='LocalFileSystem'), pytest.param(lambda tmp: SubTreeFileSystem(tmp, LocalFileSystem()), id='SubTreeFileSystem(LocalFileSystem)') ]) def fs(request, tempdir): return request.param(tempdir.as_posix()) @pytest.fixture def testpath(request, fs, tempdir): # we always use the tempdir for reading and writing test artifacts, but # if the filesystem is wrapped in a SubTreeFileSystem then we don't need # to prepend the path with the tempdir, we also test the API with both # pathlib.Path objects and plain python strings def convert(path): if isinstance(fs, SubTreeFileSystem): path = pathlib.Path(path)