def test_get_filesystem_custom_filesystem(): _DUMMY_PRFEIX = "dummy" class DummyFileSystem(LocalFileSystem): ... fsspec.register_implementation(_DUMMY_PRFEIX, DummyFileSystem, clobber=True) output_file = os.path.join(f"{_DUMMY_PRFEIX}://", "tmpdir/tmp_file") assert isinstance(get_filesystem(output_file), DummyFileSystem)
def register_implementation(protocol='https'): """ Register dCacheFileSystem as fsspec backend :param protocol: (str) URLs with this protocol will be open using dCacheFileSystem from fsspec """ fsspec.register_implementation(protocol, dCacheFileSystem, clobber=True) try: yield finally: fsspec.registry.target.pop(protocol)
def fsspectest(): pytest.importorskip("fsspec") from fsspec import register_implementation from fsspec.implementations.memory import MemoryFileSystem from fsspec.registry import _registry as registry class TestMemoryFS(MemoryFileSystem): protocol = "testmem" test = [None] def __init__(self, **kwargs): self.test[0] = kwargs.pop("test", None) super().__init__(**kwargs) register_implementation("testmem", TestMemoryFS, clobber=True) yield TestMemoryFS() registry.pop("testmem", None) TestMemoryFS.test[0] = None TestMemoryFS.store.clear()
def test_torchscript_save_load_custom_filesystem(tmpdir, modelclass): """ Test that scripted LightningModule is correctly saved and can be loaded with custom filesystems. """ _DUMMY_PRFEIX = "dummy" _PREFIX_SEPARATOR = "://" class DummyFileSystem(LocalFileSystem): ... fsspec.register_implementation(_DUMMY_PRFEIX, DummyFileSystem, clobber=True) model = modelclass() output_file = os.path.join(_DUMMY_PRFEIX, _PREFIX_SEPARATOR, tmpdir, "model.pt") script = model.to_torchscript(file_path=output_file) fs = get_filesystem(output_file) with fs.open(output_file, "rb") as f: loaded_script = torch.jit.load(f) assert torch.allclose(next(script.parameters()), next(loaded_script.parameters()))
import aiohttp import configparser import dcachefs import fsspec import os import pathlib from fsspec.core import split_protocol fsspec.register_implementation("dcache", dcachefs.dCacheFileSystem) CHUNKSIZE = 5 * 2**20 # default chunk size for streaming def configure_filesystem(filesystem="https", username=None, password=None, token_filename=None): """ Configure a http-based filesystem with authentication credentials. :param filesystem: (str) :param username: (optional, str) :param password: (optional, str) :param token_filename: (optional, str) path to file with the token """ client_kwargs = {} # use username/password authentication if (username is None) ^ (password is None): raise ValueError("Username or password not provided") if (username is not None) and (password is not None):
import base64 import json import sys from typing import Dict from urllib.parse import urlparse import fsspec import fsspec.implementations.hdfs import pyarrow as pa import pyorc import vineyard from vineyard.io.dataframe import DataframeStreamBuilder from vineyard.drivers.io import ossfs fsspec.register_implementation("hive", fsspec.implementations.hdfs.PyArrowHDFS) fsspec.register_implementation("oss", ossfs.OSSFileSystem) def arrow_type(field): if field.name == "decimal": return pa.decimal128(field.precision) elif field.name == "uniontype": return pa.union(field.cont_types) elif field.name == "array": return pa.list_(field.type) elif field.name == "map": return pa.map_(field.key, field.value) elif field.name == "struct": return pa.struct(field.fields) else:
import importlib import fsspec from . import compression _has_s3fs = importlib.util.find_spec("s3fs") is not None if _has_s3fs: from .s3filesystem import S3FileSystem # noqa: F401 # Register custom filesystems fsspec.register_implementation(compression.gzip.GZipFileSystem.protocol, compression.gzip.GZipFileSystem) def extract_path_from_uri(dataset_path: str) -> str: """ preprocesses `dataset_path` and removes remote filesystem (e.g. removing ``s3://``) Args: dataset_path (``str``): path (e.g. ``dataset/train``) or remote uri (e.g. ``s3://my-bucket/dataset/train``) of the dataset directory """ if "://" in dataset_path: dataset_path = dataset_path.split("://")[1] return dataset_path def is_remote_filesystem(fs: fsspec.spec.AbstractFileSystem) -> bool: """ Validates if filesystem has remote protocol.
import base64 import json import sys from typing import Dict import fsspec import pyarrow as pa import pyorc import vineyard from fsspec.utils import read_block from vineyard.io.byte import ByteStreamBuilder import ossfs fsspec.register_implementation("oss", ossfs.OSSFileSystem) def read_bytes( vineyard_socket: str, path: str, storage_options: Dict, read_options: Dict, proc_num: int, proc_index: int, ): client = vineyard.connect(vineyard_socket) builder = ByteStreamBuilder(client) header_row = read_options.get("header_row", False) for k, v in read_options.items():
# See the License for the specific language governing permissions and # limitations under the License. """v3iofs - An fsspec driver for v3io""" __all__ = [ '__version__', 'V3ioFS', 'V3ioFile', ] __version__ = '0.1.2' import fsspec from .file import V3ioFile # noqa: F401 from .fs import V3ioFS # noqa: F401 if hasattr(fsspec, 'register_implementation'): # TODO: Not sure about clobber=True fsspec.register_implementation('v3io', V3ioFS, clobber=True) else: from fsspec.registry import known_implementations known_implementations['v3io'] = { 'class': 'v3iofs.V3ioFS', 'err': 'Please install v3iofs to use the v3io fileysstem class' } del known_implementations del fsspec # clear the module namespace
import importlib import fsspec if importlib.util.find_spec("flywheel") is not None: fsspec.register_implementation( "flywheel", "intake_io.fsspec.flywheel.FlywheelFileSystem") fsspec.register_implementation("render", "intake_io.fsspec.render.RenderFileSystem")
_has_s3fs = importlib.util.find_spec("s3fs") is not None if _has_s3fs: from .s3filesystem import S3FileSystem # noqa: F401 COMPRESSION_FILESYSTEMS: List[compression.BaseCompressedFileFileSystem] = [ compression.Bz2FileSystem, compression.GzipFileSystem, compression.Lz4FileSystem, compression.XzFileSystem, compression.ZstdFileSystem, ] # Register custom filesystems for fs_class in COMPRESSION_FILESYSTEMS: fsspec.register_implementation(fs_class.protocol, fs_class) def extract_path_from_uri(dataset_path: str) -> str: """ preprocesses `dataset_path` and removes remote filesystem (e.g. removing ``s3://``) Args: dataset_path (``str``): path (e.g. ``dataset/train``) or remote uri (e.g. ``s3://my-bucket/dataset/train``) of the dataset directory """ if "://" in dataset_path: dataset_path = dataset_path.split("://")[1] return dataset_path def is_remote_filesystem(fs: fsspec.AbstractFileSystem) -> bool:
from .core import IPFSFileSystem from fsspec import register_implementation from ._version import get_versions __version__ = get_versions()['version'] del get_versions register_implementation(IPFSFileSystem.protocol, IPFSFileSystem) __all__ = ["__version__", "IPFSFileSystem"]
from .spec import AzureDatalakeFileSystem from .spec import AzureBlobFileSystem, AzureBlobFile from ._version import get_versions import fsspec __all__ = ["AzureBlobFileSystem", "AzureBlobFile", "AzureDatalakeFileSystem"] __version__ = get_versions()["version"] del get_versions from ._version import get_versions __version__ = get_versions()["version"] del get_versions if hasattr(fsspec, "register_implementation"): fsspec.register_implementation("abfss", AzureBlobFileSystem, clobber=True) else: from fsspec.registry import known_implementations known_implementations["abfss"] = { "class": "adlfs.AzureBlobFileSystem", "err": "Please install adlfs to use the abfss protocol", } del known_implementations del fsspec # clear the module namespace