Ejemplo n.º 1
0
Archivo: core.py Proyecto: will133/zarr
    def _load_metadata_nosync(self):
        try:
            mkey = self._key_prefix + array_meta_key
            meta_bytes = self._store[mkey]
        except KeyError:
            err_array_not_found(self._path)
        else:

            # decode and store metadata
            meta = decode_array_metadata(meta_bytes)
            self._meta = meta
            self._shape = meta['shape']
            self._chunks = meta['chunks']
            self._dtype = meta['dtype']
            self._fill_value = meta['fill_value']
            self._order = meta['order']

            # setup compressor
            config = meta['compressor']
            if config is None:
                self._compressor = None
            else:
                self._compressor = get_codec(config)

            # setup filters
            filters = meta['filters']
            if filters:
                filters = [get_codec(config) for config in filters]
            self._filters = filters
Ejemplo n.º 2
0
    def _load_metadata_nosync(self):
        try:
            mkey = self._key_prefix + array_meta_key
            meta_bytes = self._store[mkey]
        except KeyError:
            err_array_not_found(self._path)
        else:

            # decode and store metadata
            meta = decode_array_metadata(meta_bytes)
            self._meta = meta
            self._shape = meta['shape']
            self._chunks = meta['chunks']
            self._dtype = meta['dtype']
            self._fill_value = meta['fill_value']
            self._order = meta['order']

            # setup compressor
            config = meta['compressor']
            if config is None:
                self._compressor = None
            else:
                self._compressor = get_codec(config)

            # setup filters
            filters = meta['filters']
            if filters:
                filters = [get_codec(config) for config in filters]
            self._filters = filters
Ejemplo n.º 3
0
def open_array(store,
               mode='a',
               shape=None,
               chunks=True,
               dtype=None,
               compressor='default',
               fill_value=0,
               order='C',
               synchronizer=None,
               filters=None,
               cache_metadata=True,
               cache_attrs=True,
               path=None,
               object_codec=None,
               **kwargs):
    """Open an array using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    shape : int or tuple of ints, optional
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If True, will be guessed from `shape` and `dtype`. If
        False, will be set to `shape`, i.e., single chunk for the whole array.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object, optional
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    synchronizer : object, optional
        Array synchronizer.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.
    cache_metadata : bool, optional
        If True, array configuration metadata will be cached for the
        lifetime of the object. If False, array metadata will be reloaded
        prior to all data access and modification operations (may incur
        overhead depending on storage and data access pattern).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    path : string, optional
        Array path within store.
    object_codec : Codec, optional
        A codec to encode object arrays, only needed if dtype=object.

    Returns
    -------
    z : zarr.core.Array

    Examples
    --------
    >>> import numpy as np
    >>> import zarr
    >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000),
    ...                      chunks=(1000, 1000), fill_value=0)
    >>> z1[:] = np.arange(100000000).reshape(10000, 10000)
    >>> z1
    <zarr.core.Array (10000, 10000) float64>
    >>> z2 = zarr.open_array('data/example.zarr', mode='r')
    >>> z2
    <zarr.core.Array (10000, 10000) float64 read-only>
    >>> np.all(z1[:] == z2[:])
    True

    Notes
    -----
    There is no need to close an array. Data are automatically flushed to the
    file system.

    """

    # use same mode semantics as h5py
    # r : read only, must exist
    # r+ : read/write, must exist
    # w : create, delete if exists
    # w- or x : create, fail if exists
    # a : read/write if exists, create otherwise (default)

    # handle polymorphic store arg
    store = normalize_store_arg(store, clobber=(mode == 'w'))
    path = normalize_storage_path(path)

    # API compatibility with h5py
    compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)

    # ensure fill_value of correct type
    if fill_value is not None:
        fill_value = np.array(fill_value, dtype=dtype)[()]

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            err_array_not_found(path)

    elif mode == 'w':
        init_array(store,
                   shape=shape,
                   chunks=chunks,
                   dtype=dtype,
                   compressor=compressor,
                   fill_value=fill_value,
                   order=order,
                   filters=filters,
                   overwrite=True,
                   path=path,
                   object_codec=object_codec)

    elif mode == 'a':
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec)

    elif mode in ['w-', 'x']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif contains_array(store, path=path):
            err_contains_array(path)
        else:
            init_array(store,
                       shape=shape,
                       chunks=chunks,
                       dtype=dtype,
                       compressor=compressor,
                       fill_value=fill_value,
                       order=order,
                       filters=filters,
                       path=path,
                       object_codec=object_codec)

    # determine read only status
    read_only = mode == 'r'

    # instantiate array
    z = Array(store,
              read_only=read_only,
              synchronizer=synchronizer,
              cache_metadata=cache_metadata,
              cache_attrs=cache_attrs,
              path=path)

    return z
Ejemplo n.º 4
0
def open_array(store, mode='a', shape=None, chunks=True, dtype=None, compressor='default',
               fill_value=0, order='C', synchronizer=None, filters=None,
               cache_metadata=True, cache_attrs=True, path=None, object_codec=None,
               **kwargs):
    """Open an array using file-mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system or name of zip file.
    mode : {'r', 'r+', 'a', 'w', 'w-'}, optional
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    shape : int or tuple of ints, optional
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If True, will be guessed from `shape` and `dtype`. If
        False, will be set to `shape`, i.e., single chunk for the whole array.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object, optional
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    synchronizer : object, optional
        Array synchronizer.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.
    cache_metadata : bool, optional
        If True, array configuration metadata will be cached for the
        lifetime of the object. If False, array metadata will be reloaded
        prior to all data access and modification operations (may incur
        overhead depending on storage and data access pattern).
    cache_attrs : bool, optional
        If True (default), user attributes will be cached for attribute read
        operations. If False, user attributes are reloaded from the store prior
        to all attribute read operations.
    path : string, optional
        Array path within store.
    object_codec : Codec, optional
        A codec to encode object arrays, only needed if dtype=object.

    Returns
    -------
    z : zarr.core.Array

    Examples
    --------
    >>> import numpy as np
    >>> import zarr
    >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000),
    ...                      chunks=(1000, 1000), fill_value=0)
    >>> z1[:] = np.arange(100000000).reshape(10000, 10000)
    >>> z1
    <zarr.core.Array (10000, 10000) float64>
    >>> z2 = zarr.open_array('data/example.zarr', mode='r')
    >>> z2
    <zarr.core.Array (10000, 10000) float64 read-only>
    >>> np.all(z1[:] == z2[:])
    True

    Notes
    -----
    There is no need to close an array. Data are automatically flushed to the
    file system.

    """

    # use same mode semantics as h5py
    # r : read only, must exist
    # r+ : read/write, must exist
    # w : create, delete if exists
    # w- or x : create, fail if exists
    # a : read/write if exists, create otherwise (default)

    # handle polymorphic store arg
    store = normalize_store_arg(store, clobber=(mode == 'w'))
    path = normalize_storage_path(path)

    # API compatibility with h5py
    compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)

    # ensure fill_value of correct type
    if fill_value is not None:
        fill_value = np.array(fill_value, dtype=dtype)[()]

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            err_array_not_found(path)

    elif mode == 'w':
        init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                   compressor=compressor, fill_value=fill_value,
                   order=order, filters=filters, overwrite=True, path=path,
                   object_codec=object_codec)

    elif mode == 'a':
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                       compressor=compressor, fill_value=fill_value,
                       order=order, filters=filters, path=path,
                       object_codec=object_codec)

    elif mode in ['w-', 'x']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif contains_array(store, path=path):
            err_contains_array(path)
        else:
            init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                       compressor=compressor, fill_value=fill_value,
                       order=order, filters=filters, path=path,
                       object_codec=object_codec)

    # determine read only status
    read_only = mode == 'r'

    # instantiate array
    z = Array(store, read_only=read_only, synchronizer=synchronizer,
              cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path)

    return z
Ejemplo n.º 5
0
def open_array(store=None, mode='a', shape=None, chunks=None, dtype=None,
               compressor='default', fill_value=0, order='C',
               synchronizer=None, filters=None, cache_metadata=True,
               path=None, **kwargs):
    """Open array using mode-like semantics.

    Parameters
    ----------
    store : MutableMapping or string
        Store or path to directory in file system.
    mode : {'r', 'r+', 'a', 'w', 'w-'}
        Persistence mode: 'r' means read only (must exist); 'r+' means
        read/write (must exist); 'a' means read/write (create if doesn't
        exist); 'w' means create (overwrite if exists); 'w-' means create
        (fail if exists).
    shape : int or tuple of ints
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If not provided, will be guessed from `shape` and `dtype`.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object
        Default value to use for uninitialized portions of the array.
    order : {'C', 'F'}, optional
        Memory layout to be used within each chunk.
    synchronizer : object, optional
        Array synchronizer.
    filters : sequence, optional
        Sequence of filters to use to encode chunk data prior to compression.
    cache_metadata : bool, optional
        If True, array configuration metadata will be cached for the
        lifetime of the object. If False, array metadata will be reloaded
        prior to all data access and modification operations (may incur
        overhead depending on storage and data access pattern).
    path : string, optional
        Array path.

    Returns
    -------
    z : zarr.core.Array

    Examples
    --------
    >>> import numpy as np
    >>> import zarr
    >>> z1 = zarr.open_array('example.zarr', mode='w', shape=(10000, 10000),
    ...                      chunks=(1000, 1000), fill_value=0)
    >>> z1[:] = np.arange(100000000).reshape(10000, 10000)
    >>> z1
    Array((10000, 10000), float64, chunks=(1000, 1000), order=C)
      nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100
      compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
      store: DirectoryStore
    >>> z2 = zarr.open_array('example.zarr', mode='r')
    >>> z2
    Array((10000, 10000), float64, chunks=(1000, 1000), order=C)
      nbytes: 762.9M; nbytes_stored: 23.0M; ratio: 33.2; initialized: 100/100
      compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
      store: DirectoryStore
    >>> np.all(z1[:] == z2[:])
    True

    Notes
    -----
    There is no need to close an array. Data are automatically flushed to the
    file system.

    """  # flake8: noqa

    # use same mode semantics as h5py
    # r : read only, must exist
    # r+ : read/write, must exist
    # w : create, delete if exists
    # w- or x : create, fail if exists
    # a : read/write if exists, create otherwise (default)

    # handle polymorphic store arg
    store = _handle_store_arg(store)
    path = normalize_storage_path(path)

    # compatibility
    compressor, fill_value = _handle_kwargs(compressor, fill_value, kwargs)

    # ensure store is initialized

    if mode in ['r', 'r+']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            err_array_not_found(path)

    elif mode == 'w':
        init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                   compressor=compressor, fill_value=fill_value,
                   order=order, filters=filters, overwrite=True, path=path)

    elif mode == 'a':
        if contains_group(store, path=path):
            err_contains_group(path)
        elif not contains_array(store, path=path):
            init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                       compressor=compressor, fill_value=fill_value,
                       order=order, filters=filters, path=path)

    elif mode in ['w-', 'x']:
        if contains_group(store, path=path):
            err_contains_group(path)
        elif contains_array(store, path=path):
            err_contains_array(path)
        else:
            init_array(store, shape=shape, chunks=chunks, dtype=dtype,
                       compressor=compressor, fill_value=fill_value,
                       order=order, filters=filters, path=path)

    # determine read only status
    read_only = mode == 'r'

    # instantiate array
    z = Array(store, read_only=read_only, synchronizer=synchronizer,
              cache_metadata=cache_metadata, path=path)

    return z