Example #1
0
def _init_array_metadata(store, shape, chunks=None, dtype=None,
                         compressor='default',
                         fill_value=None, order='C', overwrite=False,
                         path=None, chunk_store=None, filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None and chunk_store != store:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)

    # obtain compressor config
    if compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)
    else:
        compressor_config = None

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape, chunks=chunks, dtype=dtype,
                compressor=compressor_config, fill_value=fill_value,
                order=order, filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Example #2
0
def test_normalize_chunks():
    eq((10, ), normalize_chunks((10, ), (100, ), 1))
    eq((10, ), normalize_chunks([10], (100, ), 1))
    eq((10, ), normalize_chunks(10, (100, ), 1))
    eq((10, 10), normalize_chunks((10, 10), (100, 10), 1))
    eq((10, 10), normalize_chunks(10, (100, 10), 1))
    eq((10, 10), normalize_chunks((10, None), (100, 10), 1))
    eq((30, 20, 10), normalize_chunks(30, (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, ), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, None, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, 20, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, 20, 10), (100, 20, 10), 1))
    with assert_raises(ValueError):
        normalize_chunks('foo', (100, ), 1)
    with assert_raises(ValueError):
        normalize_chunks((100, 10), (100, ), 1)

    # test auto-chunking
    chunks = normalize_chunks(None, (100, ), 1)
    eq((100, ), chunks)
Example #3
0
File: core.py Project: will133/zarr
    def view(self,
             shape=None,
             chunks=None,
             dtype=None,
             fill_value=None,
             filters=None,
             read_only=None,
             synchronizer=None):
        """Return an array sharing the same data.

        Parameters
        ----------
        shape : int or tuple of ints
            Array shape.
        chunks : int or tuple of ints, optional
            Chunk shape.
        dtype : string or dtype, optional
            NumPy dtype.
        fill_value : object
            Default value to use for uninitialized portions of the array.
        filters : sequence, optional
            Sequence of filters to use to encode chunk data prior to
            compression.
        read_only : bool, optional
            True if array should be protected against modification.
        synchronizer : object, optional
            Array synchronizer.

        Notes
        -----
        WARNING: This is an experimental feature and should be used with care.
        There are plenty of ways to generate errors and/or cause data
        corruption.

        Examples
        --------

        Bypass filters:

            >>> import zarr
            >>> import numpy as np
            >>> np.random.seed(42)
            >>> labels = [b'female', b'male']
            >>> data = np.random.choice(labels, size=10000)
            >>> filters = [zarr.Categorize(labels=labels,
            ...                                  dtype=data.dtype,
            ...                                  astype='u1')]
            >>> a = zarr.array(data, chunks=1000, filters=filters)
            >>> a[:]
            array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
                  dtype='|S6')
            >>> v = a.view(dtype='u1', filters=[])
            >>> v.is_view
            True
            >>> v[:]
            array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)

        Views can be used to modify data:

            >>> x = v[:]
            >>> x.sort()
            >>> v[:] = x
            >>> v[:]
            array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
            >>> a[:]
            array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
                  dtype='|S6')

        View as a different dtype with the same itemsize:

            >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:]
            array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
            >>> v = a.view(dtype=bool)
            >>> v[:]
            array([False, False,  True, ...,  True, False, False], dtype=bool)
            >>> np.all(a[:].view(dtype=bool) == v[:])
            True

        An array can be viewed with a dtype with a different itemsize, however
        some care is needed to adjust the shape and chunk shape so that chunk
        data is interpreted correctly:

            >>> data = np.arange(10000, dtype='u2')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:10]
            array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
            >>> v = a.view(dtype='u1', shape=20000, chunks=2000)
            >>> v[:10]
            array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
            >>> np.all(a[:].view('u1') == v[:])
            True

        Change fill value for uninitialized chunks:

            >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
            >>> a[:]
            array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
            >>> v = a.view(fill_value=42)
            >>> v[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int8)

        Note that resizing or appending to views is not permitted:

            >>> a = zarr.empty(10000)
            >>> v = a.view()
            >>> try:
            ...     v.resize(20000)
            ... except PermissionError as e:
            ...     print(e)
            not permitted for views

        """  # flake8: noqa

        store = self._store
        chunk_store = self._chunk_store
        path = self._path
        if read_only is None:
            read_only = self._read_only
        if synchronizer is None:
            synchronizer = self._synchronizer
        a = Array(store=store,
                  path=path,
                  chunk_store=chunk_store,
                  read_only=read_only,
                  synchronizer=synchronizer,
                  cache_metadata=True)
        a._is_view = True

        # allow override of some properties
        if dtype is None:
            dtype = self._dtype
        else:
            dtype = np.dtype(dtype)
            a._dtype = dtype
        if shape is None:
            shape = self._shape
        else:
            shape = normalize_shape(shape)
            a._shape = shape
        if chunks is not None:
            chunks = normalize_chunks(chunks, shape, dtype.itemsize)
            a._chunks = chunks
        if fill_value is not None:
            a._fill_value = fill_value
        if filters is not None:
            a._filters = filters

        return a
Example #4
0
def _init_array_metadata(store,
                         shape,
                         chunks=None,
                         dtype=None,
                         compressor='default',
                         fill_value=None,
                         order='C',
                         overwrite=False,
                         path=None,
                         chunk_store=None,
                         filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None and chunk_store != store:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)

    # obtain compressor config
    if compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)
    else:
        compressor_config = None

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape,
                chunks=chunks,
                dtype=dtype,
                compressor=compressor_config,
                fill_value=fill_value,
                order=order,
                filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Example #5
0
def test_normalize_chunks():
    assert (10, ) == normalize_chunks((10, ), (100, ), 1)
    assert (10, ) == normalize_chunks([10], (100, ), 1)
    assert (10, ) == normalize_chunks(10, (100, ), 1)
    assert (10, 10) == normalize_chunks((10, 10), (100, 10), 1)
    assert (10, 10) == normalize_chunks(10, (100, 10), 1)
    assert (10, 10) == normalize_chunks((10, None), (100, 10), 1)
    assert (30, 30, 30) == normalize_chunks(30, (100, 20, 10), 1)
    assert (30, 20, 10) == normalize_chunks((30, ), (100, 20, 10), 1)
    assert (30, 20, 10) == normalize_chunks((30, None), (100, 20, 10), 1)
    assert (30, 20, 10) == normalize_chunks((30, None, None), (100, 20, 10), 1)
    assert (30, 20, 10) == normalize_chunks((30, 20, None), (100, 20, 10), 1)
    assert (30, 20, 10) == normalize_chunks((30, 20, 10), (100, 20, 10), 1)
    with pytest.raises(ValueError):
        normalize_chunks('foo', (100, ), 1)
    with pytest.raises(ValueError):
        normalize_chunks((100, 10), (100, ), 1)

    # test auto-chunking
    assert (100, ) == normalize_chunks(None, (100, ), 1)
    assert (100, ) == normalize_chunks(-1, (100, ), 1)
    assert (30, 20, 10) == normalize_chunks((30, -1, None), (100, 20, 10), 1)
Example #6
0
def _init_array_metadata(store,
                         shape,
                         chunks=None,
                         dtype=None,
                         compressor='default',
                         fill_value=None,
                         order='C',
                         overwrite=False,
                         path=None,
                         chunk_store=None,
                         filters=None):

    # guard conditions
    if overwrite:
        # attempt to delete any pre-existing items in store
        rmdir(store, path)
        if chunk_store is not None:
            rmdir(chunk_store, path)
    elif contains_array(store, path):
        err_contains_array(path)
    elif contains_group(store, path):
        err_contains_group(path)

    # normalize metadata
    shape = normalize_shape(shape)
    dtype = np.dtype(dtype)
    if dtype.kind in 'mM':
        raise ValueError(
            'datetime64 and timedelta64 dtypes are not currently supported; '
            'please store the data using int64 instead')
    chunks = normalize_chunks(chunks, shape, dtype.itemsize)
    order = normalize_order(order)
    fill_value = normalize_fill_value(fill_value, dtype)

    # compressor prep
    if shape == ():
        # no point in compressing a 0-dimensional array, only a single value
        compressor = None
    elif compressor == 'none':
        # compatibility
        compressor = None
    elif compressor == 'default':
        compressor = default_compressor

    # obtain compressor config
    compressor_config = None
    if compressor:
        try:
            compressor_config = compressor.get_config()
        except AttributeError:
            err_bad_compressor(compressor)

    # obtain filters config
    if filters:
        filters_config = [f.get_config() for f in filters]
    else:
        filters_config = None

    # initialize metadata
    meta = dict(shape=shape,
                chunks=chunks,
                dtype=dtype,
                compressor=compressor_config,
                fill_value=fill_value,
                order=order,
                filters=filters_config)
    key = _path_to_prefix(path) + array_meta_key
    store[key] = encode_array_metadata(meta)

    # initialize attributes
    key = _path_to_prefix(path) + attrs_key
    store[key] = json.dumps(dict()).encode('ascii')
Example #7
0
def test_normalize_chunks():
    eq((10,), normalize_chunks((10,), (100,), 1))
    eq((10,), normalize_chunks([10], (100,), 1))
    eq((10,), normalize_chunks(10, (100,), 1))
    eq((10, 10), normalize_chunks((10, 10), (100, 10), 1))
    eq((10, 10), normalize_chunks(10, (100, 10), 1))
    eq((10, 10), normalize_chunks((10, None), (100, 10), 1))
    eq((30, 20, 10), normalize_chunks(30, (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30,), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, None, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, 20, None), (100, 20, 10), 1))
    eq((30, 20, 10), normalize_chunks((30, 20, 10), (100, 20, 10), 1))
    with assert_raises(ValueError):
        normalize_chunks('foo', (100,), 1)
    with assert_raises(ValueError):
        normalize_chunks((100, 10), (100,), 1)

    # test auto-chunking
    chunks = normalize_chunks(None, (100,), 1)
    eq((100,), chunks)
Example #8
0
    def view(self, shape=None, chunks=None, dtype=None,
             fill_value=None, filters=None, read_only=None,
             synchronizer=None):
        """Return an array sharing the same data.

        Parameters
        ----------
        shape : int or tuple of ints
            Array shape.
        chunks : int or tuple of ints, optional
            Chunk shape.
        dtype : string or dtype, optional
            NumPy dtype.
        fill_value : object
            Default value to use for uninitialized portions of the array.
        filters : sequence, optional
            Sequence of filters to use to encode chunk data prior to
            compression.
        read_only : bool, optional
            True if array should be protected against modification.
        synchronizer : object, optional
            Array synchronizer.

        Notes
        -----
        WARNING: This is an experimental feature and should be used with care.
        There are plenty of ways to generate errors and/or cause data
        corruption.

        Examples
        --------

        Bypass filters:

            >>> import zarr
            >>> import numpy as np
            >>> np.random.seed(42)
            >>> labels = [b'female', b'male']
            >>> data = np.random.choice(labels, size=10000)
            >>> filters = [zarr.Categorize(labels=labels,
            ...                                  dtype=data.dtype,
            ...                                  astype='u1')]
            >>> a = zarr.array(data, chunks=1000, filters=filters)
            >>> a[:]
            array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
                  dtype='|S6')
            >>> v = a.view(dtype='u1', filters=[])
            >>> v.is_view
            True
            >>> v[:]
            array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)

        Views can be used to modify data:

            >>> x = v[:]
            >>> x.sort()
            >>> v[:] = x
            >>> v[:]
            array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
            >>> a[:]
            array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
                  dtype='|S6')

        View as a different dtype with the same itemsize:

            >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:]
            array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
            >>> v = a.view(dtype=bool)
            >>> v[:]
            array([False, False,  True, ...,  True, False, False], dtype=bool)
            >>> np.all(a[:].view(dtype=bool) == v[:])
            True

        An array can be viewed with a dtype with a different itemsize, however
        some care is needed to adjust the shape and chunk shape so that chunk
        data is interpreted correctly:

            >>> data = np.arange(10000, dtype='u2')
            >>> a = zarr.array(data, chunks=1000)
            >>> a[:10]
            array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
            >>> v = a.view(dtype='u1', shape=20000, chunks=2000)
            >>> v[:10]
            array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
            >>> np.all(a[:].view('u1') == v[:])
            True

        Change fill value for uninitialized chunks:

            >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
            >>> a[:]
            array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
            >>> v = a.view(fill_value=42)
            >>> v[:]
            array([42, 42, 42, ..., 42, 42, 42], dtype=int8)

        Note that resizing or appending to views is not permitted:

            >>> a = zarr.empty(10000)
            >>> v = a.view()
            >>> try:
            ...     v.resize(20000)
            ... except PermissionError as e:
            ...     print(e)
            not permitted for views

        """  # flake8: noqa

        store = self._store
        chunk_store = self._chunk_store
        path = self._path
        if read_only is None:
            read_only = self._read_only
        if synchronizer is None:
            synchronizer = self._synchronizer
        a = Array(store=store, path=path, chunk_store=chunk_store,
                  read_only=read_only, synchronizer=synchronizer,
                  cache_metadata=True)
        a._is_view = True

        # allow override of some properties
        if dtype is None:
            dtype = self._dtype
        else:
            dtype = np.dtype(dtype)
            a._dtype = dtype
        if shape is None:
            shape = self._shape
        else:
            shape = normalize_shape(shape)
            a._shape = shape
        if chunks is not None:
            chunks = normalize_chunks(chunks, shape, dtype.itemsize)
            a._chunks = chunks
        if fill_value is not None:
            a._fill_value = fill_value
        if filters is not None:
            a._filters = filters

        return a