예제 #1
0
파일: test_fs.py 프로젝트: sumithraK/arrow
def subtree_s3fs(request, s3fs):
    prefix = 'pyarrow-filesystem/prefix/'
    return dict(
        fs=SubTreeFileSystem(prefix, s3fs['fs']),
        pathfn=prefix.__add__,
        allow_copy_file=True,
        allow_move_dir=False,
        allow_append_to_file=False,
    )
예제 #2
0
def subtree_localfs(request, tempdir, localfs):
    prefix = 'subtree/prefix/'
    (tempdir / prefix).mkdir(parents=True)
    return dict(
        fs=SubTreeFileSystem(prefix, localfs['fs']),
        pathfn=prefix.__add__,
        allow_move_dir=True,
        allow_append_to_file=True,
    )
예제 #3
0
def test_read_pandas_passthrough_keywords(tempdir):
    # ARROW-11464 - previously not all keywords were passed through (such as
    # the filesystem keyword)
    df = pd.DataFrame({'a': [1, 2, 3]})

    filename = tempdir / 'data.parquet'
    _write_table(df, filename)

    result = pq.read_pandas('data.parquet',
                            filesystem=SubTreeFileSystem(
                                str(tempdir), LocalFileSystem()))
    assert result.equals(pa.table(df))
예제 #4
0
def test_filesystem_equals():
    fs0 = LocalFileSystem()
    fs1 = LocalFileSystem()
    fs2 = _MockFileSystem()

    assert fs0.equals(fs0)
    assert fs0.equals(fs1)
    with pytest.raises(TypeError):
        fs0.equals('string')
    assert fs0 == fs0 == fs1
    assert fs0 != 4

    assert fs2 == fs2
    assert fs2 != _MockFileSystem()

    assert SubTreeFileSystem('/base', fs0) == SubTreeFileSystem('/base', fs0)
    assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/base', fs2)
    assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0)
예제 #5
0
def _ensure_fs(fs_or_uri):
    from pyarrow.fs import (
        FileSystem, LocalFileSystem, SubTreeFileSystem, FileType,
        _ensure_filesystem
    )

    if isinstance(fs_or_uri, str):
        # instantiate the file system from an uri, if the uri has a path
        # component then it will be treated as a path prefix
        filesystem, prefix = FileSystem.from_uri(fs_or_uri)
        is_local = isinstance(filesystem, LocalFileSystem)
        prefix = filesystem.normalize_path(prefix)
        if prefix:
            # validate that the prefix is pointing to a directory
            prefix_info = filesystem.get_file_info([prefix])[0]
            if prefix_info.type != FileType.Directory:
                raise ValueError(
                    "The path component of the filesystem URI must point to a "
                    "directory but it has a type: `{}`. The path component "
                    "is `{}` and the given filesystem URI is `{}`".format(
                        prefix_info.type.name, prefix_info.path, fs_or_uri
                    )
                )
            filesystem = SubTreeFileSystem(prefix, filesystem)
        return filesystem, is_local

    try:
        filesystem = _ensure_filesystem(fs_or_uri)
    except TypeError:
        raise TypeError(
            '`filesystem` argument must be a FileSystem instance or a valid '
            'file system URI'
        )
    if isinstance(filesystem, (LocalFileSystem, _MockFileSystem)):
        return filesystem, True
    else:
        return filesystem, False
예제 #6
0
try:
    import pathlib
except ImportError:
    import pathlib2 as pathlib  # py2 compat

import pytest

from pyarrow import ArrowIOError
from pyarrow.fs import (FileType, Selector, FileSystem, LocalFileSystem,
                        SubTreeFileSystem)
from pyarrow.tests.test_io import gzip_compress, gzip_decompress


@pytest.fixture(params=[
    pytest.param(lambda tmp: LocalFileSystem(), id='LocalFileSystem'),
    pytest.param(lambda tmp: SubTreeFileSystem(tmp, LocalFileSystem()),
                 id='SubTreeFileSystem(LocalFileSystem)')
])
def fs(request, tempdir):
    return request.param(tempdir.as_posix())


@pytest.fixture
def testpath(request, fs, tempdir):
    # we always use the tempdir for reading and writing test artifacts, but
    # if the filesystem is wrapped in a SubTreeFileSystem then we don't need
    # to prepend the path with the tempdir, we also test the API with both
    # pathlib.Path objects and plain python strings
    def convert(path):
        if isinstance(fs, SubTreeFileSystem):
            path = pathlib.Path(path)