예제 #1
0
def test_get_filesystem_custom_filesystem():
    _DUMMY_PRFEIX = "dummy"

    class DummyFileSystem(LocalFileSystem):
        ...

    fsspec.register_implementation(_DUMMY_PRFEIX, DummyFileSystem, clobber=True)
    output_file = os.path.join(f"{_DUMMY_PRFEIX}://", "tmpdir/tmp_file")
    assert isinstance(get_filesystem(output_file), DummyFileSystem)
def register_implementation(protocol='https'):
    """
    Register dCacheFileSystem as fsspec backend

    :param protocol: (str) URLs with this protocol will be open using
        dCacheFileSystem from fsspec
    """
    fsspec.register_implementation(protocol, dCacheFileSystem, clobber=True)
    try:
        yield
    finally:
        fsspec.registry.target.pop(protocol)
예제 #3
0
def fsspectest():
    pytest.importorskip("fsspec")
    from fsspec import register_implementation
    from fsspec.implementations.memory import MemoryFileSystem
    from fsspec.registry import _registry as registry

    class TestMemoryFS(MemoryFileSystem):
        protocol = "testmem"
        test = [None]

        def __init__(self, **kwargs):
            self.test[0] = kwargs.pop("test", None)
            super().__init__(**kwargs)

    register_implementation("testmem", TestMemoryFS, clobber=True)
    yield TestMemoryFS()
    registry.pop("testmem", None)
    TestMemoryFS.test[0] = None
    TestMemoryFS.store.clear()
예제 #4
0
def test_torchscript_save_load_custom_filesystem(tmpdir, modelclass):
    """ Test that scripted LightningModule is correctly saved and can be loaded with custom filesystems. """

    _DUMMY_PRFEIX = "dummy"
    _PREFIX_SEPARATOR = "://"

    class DummyFileSystem(LocalFileSystem):
        ...

    fsspec.register_implementation(_DUMMY_PRFEIX,
                                   DummyFileSystem,
                                   clobber=True)

    model = modelclass()
    output_file = os.path.join(_DUMMY_PRFEIX, _PREFIX_SEPARATOR, tmpdir,
                               "model.pt")
    script = model.to_torchscript(file_path=output_file)

    fs = get_filesystem(output_file)
    with fs.open(output_file, "rb") as f:
        loaded_script = torch.jit.load(f)

    assert torch.allclose(next(script.parameters()),
                          next(loaded_script.parameters()))
예제 #5
0
import aiohttp
import configparser
import dcachefs
import fsspec
import os
import pathlib

from fsspec.core import split_protocol

fsspec.register_implementation("dcache", dcachefs.dCacheFileSystem)

CHUNKSIZE = 5 * 2**20  # default chunk size for streaming


def configure_filesystem(filesystem="https",
                         username=None,
                         password=None,
                         token_filename=None):
    """
    Configure a http-based filesystem with authentication credentials.

    :param filesystem: (str)
    :param username: (optional, str)
    :param password: (optional, str)
    :param token_filename: (optional, str) path to file with the token
    """
    client_kwargs = {}
    # use username/password authentication
    if (username is None) ^ (password is None):
        raise ValueError("Username or password not provided")
    if (username is not None) and (password is not None):
예제 #6
0
import base64
import json
import sys
from typing import Dict
from urllib.parse import urlparse

import fsspec
import fsspec.implementations.hdfs
import pyarrow as pa
import pyorc
import vineyard
from vineyard.io.dataframe import DataframeStreamBuilder

from vineyard.drivers.io import ossfs

fsspec.register_implementation("hive", fsspec.implementations.hdfs.PyArrowHDFS)
fsspec.register_implementation("oss", ossfs.OSSFileSystem)


def arrow_type(field):
    if field.name == "decimal":
        return pa.decimal128(field.precision)
    elif field.name == "uniontype":
        return pa.union(field.cont_types)
    elif field.name == "array":
        return pa.list_(field.type)
    elif field.name == "map":
        return pa.map_(field.key, field.value)
    elif field.name == "struct":
        return pa.struct(field.fields)
    else:
예제 #7
0
파일: __init__.py 프로젝트: admariner/nlp
import importlib

import fsspec

from . import compression

_has_s3fs = importlib.util.find_spec("s3fs") is not None

if _has_s3fs:
    from .s3filesystem import S3FileSystem  # noqa: F401

# Register custom filesystems
fsspec.register_implementation(compression.gzip.GZipFileSystem.protocol,
                               compression.gzip.GZipFileSystem)


def extract_path_from_uri(dataset_path: str) -> str:
    """
    preprocesses `dataset_path` and removes remote filesystem (e.g. removing ``s3://``)

    Args:
        dataset_path (``str``): path (e.g. ``dataset/train``) or remote uri (e.g. ``s3://my-bucket/dataset/train``) of the dataset directory
    """
    if "://" in dataset_path:
        dataset_path = dataset_path.split("://")[1]
    return dataset_path


def is_remote_filesystem(fs: fsspec.spec.AbstractFileSystem) -> bool:
    """
    Validates if filesystem has remote protocol.
예제 #8
0
import base64
import json
import sys
from typing import Dict

import fsspec
import pyarrow as pa
import pyorc
import vineyard
from fsspec.utils import read_block
from vineyard.io.byte import ByteStreamBuilder

import ossfs

fsspec.register_implementation("oss", ossfs.OSSFileSystem)


def read_bytes(
    vineyard_socket: str,
    path: str,
    storage_options: Dict,
    read_options: Dict,
    proc_num: int,
    proc_index: int,
):
    client = vineyard.connect(vineyard_socket)
    builder = ByteStreamBuilder(client)

    header_row = read_options.get("header_row", False)
    for k, v in read_options.items():
예제 #9
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""v3iofs - An fsspec driver for v3io"""

__all__ = [
    '__version__',
    'V3ioFS',
    'V3ioFile',
]

__version__ = '0.1.2'

import fsspec

from .file import V3ioFile  # noqa: F401
from .fs import V3ioFS  # noqa: F401

if hasattr(fsspec, 'register_implementation'):
    # TODO: Not sure about clobber=True
    fsspec.register_implementation('v3io', V3ioFS, clobber=True)
else:
    from fsspec.registry import known_implementations
    known_implementations['v3io'] = {
        'class': 'v3iofs.V3ioFS',
        'err': 'Please install v3iofs to use the v3io fileysstem class'
    }

    del known_implementations

del fsspec  # clear the module namespace
예제 #10
0
import importlib
import fsspec

if importlib.util.find_spec("flywheel") is not None:
    fsspec.register_implementation(
        "flywheel", "intake_io.fsspec.flywheel.FlywheelFileSystem")

fsspec.register_implementation("render",
                               "intake_io.fsspec.render.RenderFileSystem")
예제 #11
0
_has_s3fs = importlib.util.find_spec("s3fs") is not None

if _has_s3fs:
    from .s3filesystem import S3FileSystem  # noqa: F401

COMPRESSION_FILESYSTEMS: List[compression.BaseCompressedFileFileSystem] = [
    compression.Bz2FileSystem,
    compression.GzipFileSystem,
    compression.Lz4FileSystem,
    compression.XzFileSystem,
    compression.ZstdFileSystem,
]

# Register custom filesystems
for fs_class in COMPRESSION_FILESYSTEMS:
    fsspec.register_implementation(fs_class.protocol, fs_class)


def extract_path_from_uri(dataset_path: str) -> str:
    """
    preprocesses `dataset_path` and removes remote filesystem (e.g. removing ``s3://``)

    Args:
        dataset_path (``str``): path (e.g. ``dataset/train``) or remote uri (e.g. ``s3://my-bucket/dataset/train``) of the dataset directory
    """
    if "://" in dataset_path:
        dataset_path = dataset_path.split("://")[1]
    return dataset_path


def is_remote_filesystem(fs: fsspec.AbstractFileSystem) -> bool:
예제 #12
0
파일: __init__.py 프로젝트: d70-t/ipfsspec
from .core import IPFSFileSystem
from fsspec import register_implementation

from ._version import get_versions
__version__ = get_versions()['version']
del get_versions

register_implementation(IPFSFileSystem.protocol, IPFSFileSystem)

__all__ = ["__version__", "IPFSFileSystem"]
예제 #13
0
파일: __init__.py 프로젝트: cjalmeida/adlfs
from .spec import AzureDatalakeFileSystem
from .spec import AzureBlobFileSystem, AzureBlobFile
from ._version import get_versions

import fsspec

__all__ = ["AzureBlobFileSystem", "AzureBlobFile", "AzureDatalakeFileSystem"]

__version__ = get_versions()["version"]
del get_versions

from ._version import get_versions

__version__ = get_versions()["version"]
del get_versions

if hasattr(fsspec, "register_implementation"):
    fsspec.register_implementation("abfss", AzureBlobFileSystem, clobber=True)
else:
    from fsspec.registry import known_implementations

    known_implementations["abfss"] = {
        "class": "adlfs.AzureBlobFileSystem",
        "err": "Please install adlfs to use the abfss protocol",
    }

    del known_implementations

del fsspec  # clear the module namespace