def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer # guard conditions if contains_array(store, path=self._path): raise ContainsArrayError(path) # initialize metadata try: mkey = self._key_prefix + group_meta_key meta_bytes = store[mkey] except KeyError: raise GroupNotFoundError(path) else: meta = decode_group_metadata(meta_bytes) self._meta = meta # setup attributes akey = self._key_prefix + attrs_key self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) # setup info self._info = InfoReporter(self)
def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, chunk_store=None, storage_options=None): """Open a group using file-mode-like semantics. Parameters ---------- store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : string, optional Group path within store. chunk_store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. Returns ------- g : zarr.hierarchy.Group Examples -------- >>> import zarr >>> root = zarr.open_group('data/example.zarr', mode='w') >>> foo = root.create_group('foo') >>> bar = root.create_group('bar') >>> root <zarr.hierarchy.Group '/'> >>> root2 = zarr.open_group('data/example.zarr', mode='a') >>> root2 <zarr.hierarchy.Group '/'> >>> root == root2 True """ # handle polymorphic store arg clobber = mode != 'r' store = _normalize_store_arg(store, clobber=clobber, storage_options=storage_options) if chunk_store is not None: chunk_store = _normalize_store_arg(chunk_store, clobber=clobber, storage_options=storage_options) path = normalize_storage_path(path) # ensure store is initialized if mode in ['r', 'r+']: if contains_array(store, path=path): raise ContainsArrayError(path) elif not contains_group(store, path=path): raise GroupNotFoundError(path) elif mode == 'w': init_group(store, overwrite=True, path=path, chunk_store=chunk_store) elif mode == 'a': if contains_array(store, path=path): raise ContainsArrayError(path) if not contains_group(store, path=path): init_group(store, path=path, chunk_store=chunk_store) elif mode in ['w-', 'x']: if contains_array(store, path=path): raise ContainsArrayError(path) elif contains_group(store, path=path): raise ContainsGroupError(path) else: init_group(store, path=path, chunk_store=chunk_store) # determine read only status read_only = mode == 'r' return Group(store, read_only=read_only, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, chunk_store=chunk_store)
def open_array(store=None, mode="a", shape=None, chunks=True, dtype=None, compressor="default", fill_value=0, order="C", synchronizer=None, filters=None, cache_metadata=True, cache_attrs=True, path=None, object_codec=None, chunk_store=None, storage_options=None, partial_decompress=False, **kwargs): """Open an array using file-mode-like semantics. Parameters ---------- store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). shape : int or tuple of ints, optional Array shape. chunks : int or tuple of ints, optional Chunk shape. If True, will be guessed from `shape` and `dtype`. If False, will be set to `shape`, i.e., single chunk for the whole array. If an int, the chunk size in each dimension will be given by the value of `chunks`. Default is True. dtype : string or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor. fill_value : object, optional Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. synchronizer : object, optional Array synchronizer. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. path : string, optional Array path within store. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. chunk_store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. partial_decompress : bool, optional If True and while the chunk_store is a FSStore and the compresion used is Blosc, when getting data from the array chunks will be partially read and decompressed when possible. .. versionadded:: 2.7 Returns ------- z : zarr.core.Array Examples -------- >>> import numpy as np >>> import zarr >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 <zarr.core.Array (10000, 10000) float64> >>> z2 = zarr.open_array('data/example.zarr', mode='r') >>> z2 <zarr.core.Array (10000, 10000) float64 read-only> >>> np.all(z1[:] == z2[:]) True Notes ----- There is no need to close an array. Data are automatically flushed to the file system. """ # use same mode semantics as h5py # r : read only, must exist # r+ : read/write, must exist # w : create, delete if exists # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) # handle polymorphic store arg clobber = (mode == 'w') store = normalize_store_arg(store, clobber=clobber, storage_options=storage_options, mode=mode) if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, clobber=clobber, storage_options=storage_options) path = normalize_storage_path(path) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) # ensure fill_value of correct type if fill_value is not None: fill_value = np.array(fill_value, dtype=dtype)[()] # ensure store is initialized if mode in ['r', 'r+']: if contains_group(store, path=path): raise ContainsGroupError(path) elif not contains_array(store, path=path): raise ArrayNotFoundError(path) elif mode == 'w': init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, overwrite=True, path=path, object_codec=object_codec, chunk_store=chunk_store) elif mode == 'a': if contains_group(store, path=path): raise ContainsGroupError(path) elif not contains_array(store, path=path): init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec, chunk_store=chunk_store) elif mode in ['w-', 'x']: if contains_group(store, path=path): raise ContainsGroupError(path) elif contains_array(store, path=path): raise ContainsArrayError(path) else: init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec, chunk_store=chunk_store) # determine read only status read_only = mode == 'r' # instantiate array z = Array(store, read_only=read_only, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path, chunk_store=chunk_store) return z
def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None, zarr_version=None): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if chunk_store is not None: chunk_store: BaseStore = _normalize_store_arg( chunk_store, zarr_version=zarr_version) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: self._key_prefix = self._path + '/' else: self._key_prefix = '' self._read_only = read_only self._synchronizer = synchronizer self._version = zarr_version if self._version == 3: self._data_key_prefix = data_root + self._key_prefix self._data_path = data_root + self._path self._hierarchy_metadata = _get_hierarchy_metadata( store=self._store) self._metadata_key_suffix = _get_metadata_suffix(store=self._store) # guard conditions if contains_array(store, path=self._path): raise ContainsArrayError(path) # initialize metadata try: mkey = _prefix_to_group_key(self._store, self._key_prefix) assert not mkey.endswith("root/.group") meta_bytes = store[mkey] except KeyError: if self._version == 2: raise GroupNotFoundError(path) else: implicit_prefix = meta_root + self._key_prefix if self._store.list_prefix(implicit_prefix): # implicit group does not have any metadata self._meta = None else: raise GroupNotFoundError(path) else: self._meta = self._store._metadata_class.decode_group_metadata( meta_bytes) # setup attributes if self._version == 2: akey = self._key_prefix + attrs_key else: # Note: mkey doesn't actually exist for implicit groups, but the # object can still be created. akey = mkey self._attrs = Attributes(store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer) # setup info self._info = InfoReporter(self)
def open_array(store=None, mode="a", shape=None, chunks=True, dtype=None, compressor="default", fill_value=0, order="C", synchronizer=None, filters=None, cache_metadata=True, cache_attrs=True, path=None, object_codec=None, chunk_store=None, storage_options=None, partial_decompress=False, write_empty_chunks=True, *, zarr_version=None, dimension_separator=None, **kwargs): """Open an array using file-mode-like semantics. Parameters ---------- store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). shape : int or tuple of ints, optional Array shape. chunks : int or tuple of ints, optional Chunk shape. If True, will be guessed from `shape` and `dtype`. If False, will be set to `shape`, i.e., single chunk for the whole array. If an int, the chunk size in each dimension will be given by the value of `chunks`. Default is True. dtype : string or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor. fill_value : object, optional Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. synchronizer : object, optional Array synchronizer. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. path : string, optional Array path within store. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. chunk_store : MutableMapping or string, optional Store or path to directory in file system or name of zip file. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. partial_decompress : bool, optional If True and while the chunk_store is a FSStore and the compression used is Blosc, when getting data from the array chunks will be partially read and decompressed when possible. write_empty_chunks : bool, optional If True (default), all chunks will be stored regardless of their contents. If False, each chunk is compared to the array's fill value prior to storing. If a chunk is uniformly equal to the fill value, then that chunk is not be stored, and the store entry for that chunk's key is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. .. versionadded:: 2.11 zarr_version : {None, 2, 3}, optional The zarr protocol version of the array to be opened. If None, it will be inferred from ``store`` or ``chunk_store`` if they are provided, otherwise defaulting to 2. dimension_separator : {None, '.', '/'}, optional Can be used to specify whether the array is in a flat ('.') or nested ('/') format. If None, the appropriate value will be read from `store` when present. Otherwise, defaults to '.' when ``zarr_version == 2`` and `/` otherwise. Returns ------- z : zarr.core.Array Examples -------- >>> import numpy as np >>> import zarr >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 <zarr.core.Array (10000, 10000) float64> >>> z2 = zarr.open_array('data/example.zarr', mode='r') >>> z2 <zarr.core.Array (10000, 10000) float64 read-only> >>> np.all(z1[:] == z2[:]) True Notes ----- There is no need to close an array. Data are automatically flushed to the file system. """ # use same mode semantics as h5py # r : read only, must exist # r+ : read/write, must exist # w : create, delete if exists # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) if zarr_version is None and store is None: zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) # handle polymorphic store arg store = normalize_store_arg(store, storage_options=storage_options, mode=mode, zarr_version=zarr_version) zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) if chunk_store is not None: chunk_store = normalize_store_arg(chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version) # respect the dimension separator specified in a store, if present if dimension_separator is None: if hasattr(store, '_dimension_separator'): dimension_separator = store._dimension_separator else: dimension_separator = '.' if zarr_version == 2 else '/' if zarr_version == 3 and path is None: path = 'array' # TODO: raise ValueError instead? path = normalize_storage_path(path) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) # ensure fill_value of correct type if fill_value is not None: fill_value = np.array(fill_value, dtype=dtype)[()] # ensure store is initialized if mode in ['r', 'r+']: if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) raise ArrayNotFoundError(path) elif mode == 'w': init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, overwrite=True, path=path, object_codec=object_codec, chunk_store=chunk_store, dimension_separator=dimension_separator) elif mode == 'a': if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec, chunk_store=chunk_store, dimension_separator=dimension_separator) elif mode in ['w-', 'x']: if contains_group(store, path=path): raise ContainsGroupError(path) elif contains_array(store, path=path): raise ContainsArrayError(path) else: init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, filters=filters, path=path, object_codec=object_codec, chunk_store=chunk_store, dimension_separator=dimension_separator) # determine read only status read_only = mode == 'r' # instantiate array z = Array(store, read_only=read_only, synchronizer=synchronizer, cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path, chunk_store=chunk_store, write_empty_chunks=write_empty_chunks) return z