Ejemplo n.º 1
0
    def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
                                **kwargs):

        path = self._item_path(name)

        if contains_array(self._store, path):
            synchronizer = kwargs.get('synchronizer', self._synchronizer)
            cache_metadata = kwargs.get('cache_metadata', True)
            a = Array(self._store, path=path, read_only=self._read_only,
                      chunk_store=self._chunk_store,
                      synchronizer=synchronizer, cache_metadata=cache_metadata)
            shape = normalize_shape(shape)
            if shape != a.shape:
                raise TypeError('shapes do not match')
            dtype = np.dtype(dtype)
            if exact:
                if dtype != a.dtype:
                    raise TypeError('dtypes do not match exactly')
            else:
                if not np.can_cast(dtype, a.dtype):
                    raise TypeError('dtypes cannot be safely cast')
            return a

        else:
            return self._create_dataset_nosync(name, shape=shape, dtype=dtype,
                                               **kwargs)
Ejemplo n.º 2
0
    def _require_dataset_nosync(self,
                                name,
                                shape,
                                dtype=None,
                                exact=False,
                                **kwargs):

        path = self._item_path(name)

        if contains_array(self._store, path):
            synchronizer = kwargs.get('synchronizer', self._synchronizer)
            cache_metadata = kwargs.get('cache_metadata', True)
            a = Array(self._store,
                      path=path,
                      read_only=self._read_only,
                      chunk_store=self._chunk_store,
                      synchronizer=synchronizer,
                      cache_metadata=cache_metadata)
            shape = normalize_shape(shape)
            if shape != a.shape:
                raise TypeError('shapes do not match')
            dtype = np.dtype(dtype)
            if exact:
                if dtype != a.dtype:
                    raise TypeError('dtypes do not match exactly')
            else:
                if not np.can_cast(dtype, a.dtype):
                    raise TypeError('dtypes cannot be safely cast')
            return a

        else:
            return self._create_dataset_nosync(name,
                                               shape=shape,
                                               dtype=dtype,
                                               **kwargs)
Ejemplo n.º 3
0
    def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
                                **kwargs):

        path = self._item_path(name)

        if contains_array(self._store, path):

            # array already exists at path, validate that it is the right shape and type

            synchronizer = kwargs.get('synchronizer', self._synchronizer)
            cache_metadata = kwargs.get('cache_metadata', True)
            cache_attrs = kwargs.get('cache_attrs', self.attrs.cache)
            a = Array(self._store, path=path, read_only=self._read_only,
                      chunk_store=self._chunk_store, synchronizer=synchronizer,
                      cache_metadata=cache_metadata, cache_attrs=cache_attrs)
            shape = normalize_shape(shape)
            if shape != a.shape:
                raise TypeError('shape do not match existing array; expected {}, got {}'
                                .format(a.shape, shape))
            dtype = np.dtype(dtype)
            if exact:
                if dtype != a.dtype:
                    raise TypeError('dtypes do not match exactly; expected {}, got {}'
                                    .format(a.dtype, dtype))
            else:
                if not np.can_cast(dtype, a.dtype):
                    raise TypeError('dtypes ({}, {}) cannot be safely cast'
                                    .format(dtype, a.dtype))
            return a

        else:
            return self._create_dataset_nosync(name, shape=shape, dtype=dtype,
                                               **kwargs)
Ejemplo n.º 4
0
def extract_zarray(da, encoding, dtype):
    """ helper function to extract zarr array metadata. """
    meta = {
        "compressor":
        encoding.get("compressor",
                     da.encoding.get("compressor", default_compressor)),
        "filters":
        encoding.get("filters", da.encoding.get("filters", None)),
        "chunks":
        encoding.get("chunks", None),
        "dtype":
        dtype.str,
        "fill_value":
        _extract_fill_value(da, dtype),
        "order":
        "C",
        "shape":
        list(normalize_shape(da.shape)),
        "zarr_format":
        zarr_format,
    }
    if meta["chunks"] is None:
        if da.chunks is not None:
            meta["chunks"] = list([c[0] for c in da.chunks])
        else:
            meta["chunks"] = list(da.shape)

    return meta
Ejemplo n.º 5
0
    def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
                                **kwargs):

        path = self._item_path(name)

        if contains_array(self._store, path):

            # array already exists at path, validate that it is the right shape and type

            synchronizer = kwargs.get('synchronizer', self._synchronizer)
            cache_metadata = kwargs.get('cache_metadata', True)
            cache_attrs = kwargs.get('cache_attrs', self.attrs.cache)
            a = Array(self._store, path=path, read_only=self._read_only,
                      chunk_store=self._chunk_store, synchronizer=synchronizer,
                      cache_metadata=cache_metadata, cache_attrs=cache_attrs)
            shape = normalize_shape(shape)
            if shape != a.shape:
                raise TypeError('shape do not match existing array; expected {}, got {}'
                                .format(a.shape, shape))
            dtype = np.dtype(dtype)
            if exact:
                if dtype != a.dtype:
                    raise TypeError('dtypes do not match exactly; expected {}, got {}'
                                    .format(a.dtype, dtype))
            else:
                if not np.can_cast(dtype, a.dtype):
                    raise TypeError('dtypes ({}, {}) cannot be safely cast'
                                    .format(dtype, a.dtype))
            return a

        else:
            return self._create_dataset_nosync(name, shape=shape, dtype=dtype,
                                               **kwargs)
Ejemplo n.º 6
0
def test_normalize_shape():
    eq((100, ), normalize_shape((100, )))
    eq((100, ), normalize_shape([100]))
    eq((100, ), normalize_shape(100))
    with assert_raises(TypeError):
        normalize_shape(None)
    with assert_raises(ValueError):
        normalize_shape('foo')
Ejemplo n.º 7
0
def test_normalize_shape():
    assert (100, ) == normalize_shape((100, ))
    assert (100, ) == normalize_shape([100])
    assert (100, ) == normalize_shape(100)
    with pytest.raises(TypeError):
        normalize_shape(None)
    with pytest.raises(ValueError):
        normalize_shape('foo')
Ejemplo n.º 8
0
def test_normalize_shape():
    eq((100,), normalize_shape((100,)))
    eq((100,), normalize_shape([100]))
    eq((100,), normalize_shape(100))
    with assert_raises(TypeError):
        normalize_shape(None)
    with assert_raises(ValueError):
        normalize_shape('foo')
Ejemplo n.º 9
0
def _init_array_metadata(store, shape, chunks=None, dtype=None,
                         compressor='default',
                         fill_value=None, order='C', overwrite=False,
                         path=None, chunk_store=None, filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None and chunk_store != store:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)

    # obtain compressor config
    if compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)
    else:
        compressor_config = None

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape, chunks=chunks, dtype=dtype,
                compressor=compressor_config, fill_value=fill_value,
                order=order, filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Ejemplo n.º 10
0
def _extract_zarray(da, encoding, dtype):
    """ helper function to extract zarr array metadata. """
    meta = {
        'compressor':
        encoding.get('compressor',
                     da.encoding.get('compressor', default_compressor)),
        'filters':
        encoding.get('filters', da.encoding.get('filters', None)),
        'chunks':
        encoding.get('chunks', None),
        'dtype':
        dtype.str,
        'fill_value':
        _extract_fill_value(da, dtype),
        'order':
        'C',
        'shape':
        list(normalize_shape(da.shape)),
        'zarr_format':
        zarr_format,
    }

    if meta['chunks'] is None:
        meta['chunks'] = da.shape

    # validate chunks
    if isinstance(da.data, dask_array_type):
        var_chunks = tuple([c[0] for c in da.data.chunks])
    else:
        var_chunks = da.shape
    if not var_chunks == tuple(meta['chunks']):
        raise ValueError('Encoding chunks do not match inferred chunks')

    meta['chunks'] = list(meta['chunks'])  # return chunks as a list

    return meta
Ejemplo n.º 11
0
Archivo: core.py Proyecto: will133/zarr
    def view(self,
             shape=None,
             chunks=None,
             dtype=None,
             fill_value=None,
             filters=None,
             read_only=None,
             synchronizer=None):
        """Return an array sharing the same data.

        Parameters
        ----------
        shape : int or tuple of ints
            Array shape.
        chunks : int or tuple of ints, optional
            Chunk shape.
        dtype : string or dtype, optional
            NumPy dtype.
        fill_value : object
            Default value to use for uninitialized portions of the array.
        filters : sequence, optional
            Sequence of filters to use to encode chunk data prior to
            compression.
        read_only : bool, optional
            True if array should be protected against modification.
        synchronizer : object, optional
            Array synchronizer.

        Notes
        -----
        WARNING: This is an experimental feature and should be used with care.
        There are plenty of ways to generate errors and/or cause data
        corruption.

        Examples
        --------

        Bypass filters:

            >>> import zarr
            >>> import numpy as np
            >>> np.random.seed(42)
            >>> labels = [b'female', b'male']
            >>> data = np.random.choice(labels, size=10000)
            >>> filters = [zarr.Categorize(labels=labels,
            ...                                  dtype=data.dtype,
            ...                                  astype='u1')]
            >>> a = zarr.array(data, chunks=1000, filters=filters)
            >>> a[:]
            array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
                  dtype='|S6')
            >>> v = a.view(dtype='u1', filters=[])
            >>> v.is_view
            True
            >>> v[:]
            array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)

        Views can be used to modify data:

            >>> x = v[:]
            >>> x.sort()
            >>> v[:] = x
            >>> v[:]
            array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
            >>> a[:]
            array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
                  dtype='|S6')

        View as a different dtype with the same itemsize:

            >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:]
            array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
            >>> v = a.view(dtype=bool)
            >>> v[:]
            array([False, False,  True, ...,  True, False, False], dtype=bool)
            >>> np.all(a[:].view(dtype=bool) == v[:])
            True

        An array can be viewed with a dtype with a different itemsize, however
        some care is needed to adjust the shape and chunk shape so that chunk
        data is interpreted correctly:

            >>> data = np.arange(10000, dtype='u2')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:10]
            array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
            >>> v = a.view(dtype='u1', shape=20000, chunks=2000)
            >>> v[:10]
            array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
            >>> np.all(a[:].view('u1') == v[:])
            True

        Change fill value for uninitialized chunks:

            >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
            >>> a[:]
            array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
            >>> v = a.view(fill_value=42)
            >>> v[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int8)

        Note that resizing or appending to views is not permitted:

            >>> a = zarr.empty(10000)
            >>> v = a.view()
            >>> try:
            ...     v.resize(20000)
            ... except PermissionError as e:
            ...     print(e)
            not permitted for views

        """  # flake8: noqa

        store = self._store
        chunk_store = self._chunk_store
        path = self._path
        if read_only is None:
            read_only = self._read_only
        if synchronizer is None:
            synchronizer = self._synchronizer
        a = Array(store=store,
                  path=path,
                  chunk_store=chunk_store,
                  read_only=read_only,
                  synchronizer=synchronizer,
                  cache_metadata=True)
        a._is_view = True

        # allow override of some properties
        if dtype is None:
            dtype = self._dtype
        else:
            dtype = np.dtype(dtype)
            a._dtype = dtype
        if shape is None:
            shape = self._shape
        else:
            shape = normalize_shape(shape)
            a._shape = shape
        if chunks is not None:
            chunks = normalize_chunks(chunks, shape, dtype.itemsize)
            a._chunks = chunks
        if fill_value is not None:
            a._fill_value = fill_value
        if filters is not None:
            a._filters = filters

        return a
Ejemplo n.º 12
0
def _init_array_metadata(store,
                         shape,
                         chunks=None,
                         dtype=None,
                         compressor='default',
                         fill_value=None,
                         order='C',
                         overwrite=False,
                         path=None,
                         chunk_store=None,
                         filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None and chunk_store != store:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)

    # obtain compressor config
    if compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)
    else:
        compressor_config = None

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape,
                chunks=chunks,
                dtype=dtype,
                compressor=compressor_config,
                fill_value=fill_value,
                order=order,
                filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Ejemplo n.º 13
0
def _init_array_metadata(store,
                         shape,
                         chunks=None,
                         dtype=None,
                         compressor='default',
                         fill_value=None,
                         order='C',
                         overwrite=False,
                         path=None,
                         chunk_store=None,
                         filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    if dtype.kind in 'mM':
        raise ValueError(
            'datetime64 and timedelta64 dtypes are not currently supported; '
            'please store the data using int64 instead')
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)
    fill_value = normalize_fill_value(fill_value, dtype)

    # compressor prep
    if shape == ():
        # no point in compressing a 0-dimensional array, only a single value
        compressor = None
    elif compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor

    # obtain compressor config
    compressor_config = None
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape,
                chunks=chunks,
                dtype=dtype,
                compressor=compressor_config,
                fill_value=fill_value,
                order=order,
                filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Ejemplo n.º 14
0
    def view(self, shape=None, chunks=None, dtype=None,
             fill_value=None, filters=None, read_only=None,
             synchronizer=None):
        """Return an array sharing the same data.

        Parameters
        ----------
        shape : int or tuple of ints
            Array shape.
        chunks : int or tuple of ints, optional
            Chunk shape.
        dtype : string or dtype, optional
            NumPy dtype.
        fill_value : object
            Default value to use for uninitialized portions of the array.
        filters : sequence, optional
            Sequence of filters to use to encode chunk data prior to
            compression.
        read_only : bool, optional
            True if array should be protected against modification.
        synchronizer : object, optional
            Array synchronizer.

        Notes
        -----
        WARNING: This is an experimental feature and should be used with care.
        There are plenty of ways to generate errors and/or cause data
        corruption.

        Examples
        --------

        Bypass filters:

            >>> import zarr
            >>> import numpy as np
            >>> np.random.seed(42)
            >>> labels = [b'female', b'male']
            >>> data = np.random.choice(labels, size=10000)
            >>> filters = [zarr.Categorize(labels=labels,
            ...                                  dtype=data.dtype,
            ...                                  astype='u1')]
            >>> a = zarr.array(data, chunks=1000, filters=filters)
            >>> a[:]
            array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
                  dtype='|S6')
            >>> v = a.view(dtype='u1', filters=[])
            >>> v.is_view
            True
            >>> v[:]
            array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)

        Views can be used to modify data:

            >>> x = v[:]
            >>> x.sort()
            >>> v[:] = x
            >>> v[:]
            array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
            >>> a[:]
            array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
                  dtype='|S6')

        View as a different dtype with the same itemsize:

            >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:]
            array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
            >>> v = a.view(dtype=bool)
            >>> v[:]
            array([False, False,  True, ...,  True, False, False], dtype=bool)
            >>> np.all(a[:].view(dtype=bool) == v[:])
            True

        An array can be viewed with a dtype with a different itemsize, however
        some care is needed to adjust the shape and chunk shape so that chunk
        data is interpreted correctly:

            >>> data = np.arange(10000, dtype='u2')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:10]
            array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
            >>> v = a.view(dtype='u1', shape=20000, chunks=2000)
            >>> v[:10]
            array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
            >>> np.all(a[:].view('u1') == v[:])
            True

        Change fill value for uninitialized chunks:

            >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
            >>> a[:]
            array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
            >>> v = a.view(fill_value=42)
            >>> v[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int8)

        Note that resizing or appending to views is not permitted:

            >>> a = zarr.empty(10000)
            >>> v = a.view()
            >>> try:
            ...     v.resize(20000)
            ... except PermissionError as e:
            ...     print(e)
            not permitted for views

        """  # flake8: noqa

        store = self._store
        chunk_store = self._chunk_store
        path = self._path
        if read_only is None:
            read_only = self._read_only
        if synchronizer is None:
            synchronizer = self._synchronizer
        a = Array(store=store, path=path, chunk_store=chunk_store,
                  read_only=read_only, synchronizer=synchronizer,
                  cache_metadata=True)
        a._is_view = True

        # allow override of some properties
        if dtype is None:
            dtype = self._dtype
        else:
            dtype = np.dtype(dtype)
            a._dtype = dtype
        if shape is None:
            shape = self._shape
        else:
            shape = normalize_shape(shape)
            a._shape = shape
        if chunks is not None:
            chunks = normalize_chunks(chunks, shape, dtype.itemsize)
            a._chunks = chunks
        if fill_value is not None:
            a._fill_value = fill_value
        if filters is not None:
            a._filters = filters

        return a