Esempio n. 1
0
def open_remote(url):
    import zarr
    from fsspec.implementations.http import HTTPFileSystem
    fs = HTTPFileSystem()
    mapper = fs.get_mapper(url)
    ds = xr.open_zarr(mapper, consolidated=True).yaxit.as_binned()
    return ds
def structure_mesh(allen_id):
    if allen_id in _cache:
        return _cache[allen_id]
    fs = HTTPFileSystem()
    # Todo: Use AWS store after Scott / Lydia upload
    store = fs.get_mapper(
        "https://thewtex.github.io/allen-ccf-itk-vtk-zarr/meshes/{0}.zarr".
        format(allen_id))
    root = zarr.open_consolidated(store)
    mesh = zarr_to_vtkjs(root)
    _cache[allen_id] = mesh
    return mesh
Esempio n. 3
0
 def __init__(self):
     from fsspec.implementations.http import HTTPFileSystem
     fs = HTTPFileSystem(size_policy='get')
     base_path = 'https://data.nas.nasa.gov/ecco/download_data.php?file=/eccodata/llc_4320/compressed'
     mask_path = 'https://storage.googleapis.com/pangeo-ecco/llc/masks/llc_4320_masks.zarr/'
     store = stores.NestedStore(fs,
                                base_path=base_path,
                                mask_path=mask_path,
                                shrunk=True)
     super(ECCOPortalLLC4320Model, self).__init__(store)
Esempio n. 4
0
def list_remote_files_fsspec(url: str,
                             ttl: CacheExpiry = CacheExpiry.FILEINDEX
                             ) -> List[str]:
    """
    A function used to create a listing of all files of a given path on the server.

    The default ttl with ``CacheExpiry.FILEINDEX`` is "5 minutes".

    :param url:         The URL which should be searched for files.
    :param ttl:         The cache expiration time.
    :returns:  A list of strings representing the files from the path.
    """
    fs = HTTPFileSystem(
        use_listings_cache=True,
        listings_expiry_time=not WD_CACHE_DISABLE and ttl.value,
        listings_cache_type="filedircache",
        listings_cache_location=cache_dir,
        client_kwargs=FSSPEC_CLIENT_KWARGS,
    )

    return fs.find(url)
def test_compare_available_dwd_datasets():
    """Test to compare the datasets made available with wetterdienst with the ones actually availabel on the DWD CDC
    server instance"""
    # similar to func list_remote_files_fsspec, but we don't want to get full depth
    fs = HTTPFileSystem(
        use_listings_cache=True,
        listings_expiry_time=CacheExpiry.TWELVE_HOURS.value,
        listings_cache_type="filedircache",
        listings_cache_location=cache_dir,
        client_kwargs=FSSPEC_CLIENT_KWARGS,
    )

    base_url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/"

    files = fs.expand_path(base_url, recursive=True, maxdepth=3)

    df = pd.DataFrame({"files": files})

    df.files = df.files.str[len(base_url):-1]

    # filter resolution folders
    df = df.loc[df.files.str.count("/") == 1, :]

    df.loc[:, ["resolution", "dataset"]] = df.pop("files").str.split(
        "/").tolist()

    for _, (resolution, dataset) in df.iterrows():
        rd_pair = (resolution, dataset)

        if rd_pair in SKIP_DATASETS:
            continue

        resolution = parse_enumeration_from_template(resolution,
                                                     DwdObservationResolution,
                                                     Resolution)
        dataset = DwdObservationDataset(dataset)

        assert dataset in RESOLUTION_DATASET_MAPPING[resolution].keys()
        assert DwdObservationParameter[resolution.name][dataset.name]
Esempio n. 6
0
 def register(cls, ttl=CacheExpiry.NO_CACHE):
     ttl_name, ttl_value = cls.resolve_ttl(ttl)
     key = f"ttl-{ttl_name}"
     real_cache_dir = os.path.join(cache_dir, "fsspec", key)
     filesystem_real = HTTPFileSystem(use_listings_cache=True,
                                      client_kwargs=FSSPEC_CLIENT_KWARGS)
     if WD_CACHE_DISABLE or ttl is CacheExpiry.NO_CACHE:
         filesystem_effective = filesystem_real
     else:
         filesystem_effective = WholeFileCacheFileSystem(
             fs=filesystem_real,
             cache_storage=real_cache_dir,
             expiry_time=ttl_value)
     cls.filesystems[key] = filesystem_effective
Esempio n. 7
0
def _make_http_filesystem():
    import fsspec
    from fsspec.implementations.http import HTTPFileSystem
    return HTTPFileSystem()
from itkwidgets import view
import numpy as np
from traitlets import CFloat, CInt, List, Unicode, validate
import xarray as xr
import zarr
import urllib.request
import matplotlib.colors

from .structure_graph import acronym_to_allen_id, allen_id_to_acronym, structure_graph, allen_id_to_tree_node
from .allen_id_label import labels_for_allen_id
from .swc_morphology import swc_morphology_geometry
from .structure_mesh import structure_mesh

from IPython.core.debugger import set_trace

_image_fs = HTTPFileSystem()
# Todo: Use AWS store after Scott / Lydia upload
_image_store = _image_fs.get_mapper(
    "https://thewtex.github.io/allen-ccf-itk-vtk-zarr/average_template_50_chunked.zarr"
)
_image_store_cached = zarr.LRUStoreCache(_image_store, max_size=None)
_image_ds = xr.open_zarr(_image_store_cached, consolidated=True)
_image_da = _image_ds.average_template_50

_label_image_fs = HTTPFileSystem()
# Todo: Use AWS store after Scott / Lydia upload
_label_image_store = _label_image_fs.get_mapper(
    "https://thewtex.github.io/allen-ccf-itk-vtk-zarr/allen_ccfv3_annotation_50_contiguous.zarr"
)
_label_image_store_cached = zarr.LRUStoreCache(_label_image_store,
                                               max_size=None)
def httpfs_from_config():
    return PyFileSystem(FSSpecHandler(HTTPFileSystem()))
Esempio n. 10
0
def _resolve_paths_and_filesystem(
    paths: Union[str, List[str]],
    filesystem: "pyarrow.fs.FileSystem" = None,
) -> Tuple[List[str], "pyarrow.fs.FileSystem"]:
    """
    Resolves and normalizes all provided paths, infers a filesystem from the
    paths and ensures that all paths use the same filesystem.

    Args:
        paths: A single file/directory path or a list of file/directory paths.
            A list of paths can contain both files and directories.
        filesystem: The filesystem implementation that should be used for
            reading these files. If None, a filesystem will be inferred. If not
            None, the provided filesystem will still be validated against all
            filesystems inferred from the provided paths to ensure
            compatibility.
    """
    import pyarrow as pa
    from pyarrow.fs import (
        FileSystem,
        FSSpecHandler,
        PyFileSystem,
        _resolve_filesystem_and_path,
    )

    if isinstance(paths, str):
        paths = [paths]
    elif not isinstance(paths, list) or any(not isinstance(p, str)
                                            for p in paths):
        raise ValueError(
            "paths must be a path string or a list of path strings.")
    elif len(paths) == 0:
        raise ValueError("Must provide at least one path.")

    need_unwrap_path_protocol = True
    if filesystem and not isinstance(filesystem, FileSystem):
        err_msg = (f"The filesystem passed must either conform to "
                   f"pyarrow.fs.FileSystem, or "
                   f"fsspec.spec.AbstractFileSystem. The provided "
                   f"filesystem was: {filesystem}")
        try:
            import fsspec
            from fsspec.implementations.http import HTTPFileSystem
        except ModuleNotFoundError:
            # If filesystem is not a pyarrow filesystem and fsspec isn't
            # installed, then filesystem is neither a pyarrow filesystem nor
            # an fsspec filesystem, so we raise a TypeError.
            raise TypeError(err_msg) from None
        if not isinstance(filesystem, fsspec.spec.AbstractFileSystem):
            raise TypeError(err_msg) from None
        if isinstance(filesystem, HTTPFileSystem):
            # If filesystem is fsspec HTTPFileSystem, the protocol/scheme of paths
            # should not be unwrapped/removed, because HTTPFileSystem expects full file
            # paths including protocol/scheme. This is different behavior compared to
            # file systems implementation in pyarrow.fs.FileSystem.
            need_unwrap_path_protocol = False

        filesystem = PyFileSystem(FSSpecHandler(filesystem))

    resolved_paths = []
    for path in paths:
        path = _resolve_example_path(path)
        try:
            resolved_filesystem, resolved_path = _resolve_filesystem_and_path(
                path, filesystem)
        except pa.lib.ArrowInvalid as e:
            if "Cannot parse URI" in str(e):
                resolved_filesystem, resolved_path = _resolve_filesystem_and_path(
                    _encode_url(path), filesystem)
                resolved_path = _decode_url(resolved_path)
            elif "Unrecognized filesystem type in URI" in str(e):
                scheme = urllib.parse.urlparse(path,
                                               allow_fragments=False).scheme
                if scheme in ["http", "https"]:
                    # If scheme of path is HTTP and filesystem is not resolved,
                    # try to use fsspec HTTPFileSystem. This expects fsspec is
                    # installed.
                    try:
                        from fsspec.implementations.http import HTTPFileSystem
                    except ModuleNotFoundError:
                        raise ImportError(
                            "Please install fsspec to read files from HTTP."
                        ) from None

                    resolved_filesystem = PyFileSystem(
                        FSSpecHandler(HTTPFileSystem()))
                    resolved_path = path
                    need_unwrap_path_protocol = False
                else:
                    raise
            else:
                raise
        if filesystem is None:
            filesystem = resolved_filesystem
        elif need_unwrap_path_protocol:
            resolved_path = _unwrap_protocol(resolved_path)
        resolved_path = filesystem.normalize_path(resolved_path)
        resolved_paths.append(resolved_path)

    return resolved_paths, filesystem