def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None and chunk_store != store: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) # obtain compressor config if compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) else: compressor_config = None # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')
def test_normalize_chunks(): eq((10, ), normalize_chunks((10, ), (100, ), 1)) eq((10, ), normalize_chunks([10], (100, ), 1)) eq((10, ), normalize_chunks(10, (100, ), 1)) eq((10, 10), normalize_chunks((10, 10), (100, 10), 1)) eq((10, 10), normalize_chunks(10, (100, 10), 1)) eq((10, 10), normalize_chunks((10, None), (100, 10), 1)) eq((30, 20, 10), normalize_chunks(30, (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, ), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, None, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, 20, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, 20, 10), (100, 20, 10), 1)) with assert_raises(ValueError): normalize_chunks('foo', (100, ), 1) with assert_raises(ValueError): normalize_chunks((100, 10), (100, ), 1) # test auto-chunking chunks = normalize_chunks(None, (100, ), 1) eq((100, ), chunks)
def view(self, shape=None, chunks=None, dtype=None, fill_value=None, filters=None, read_only=None, synchronizer=None): """Return an array sharing the same data. Parameters ---------- shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. dtype : string or dtype, optional NumPy dtype. fill_value : object Default value to use for uninitialized portions of the array. filters : sequence, optional Sequence of filters to use to encode chunk data prior to compression. read_only : bool, optional True if array should be protected against modification. synchronizer : object, optional Array synchronizer. Notes ----- WARNING: This is an experimental feature and should be used with care. There are plenty of ways to generate errors and/or cause data corruption. Examples -------- Bypass filters: >>> import zarr >>> import numpy as np >>> np.random.seed(42) >>> labels = [b'female', b'male'] >>> data = np.random.choice(labels, size=10000) >>> filters = [zarr.Categorize(labels=labels, ... dtype=data.dtype, ... astype='u1')] >>> a = zarr.array(data, chunks=1000, filters=filters) >>> a[:] array([b'female', b'male', b'female', ..., b'male', b'male', b'female'], dtype='|S6') >>> v = a.view(dtype='u1', filters=[]) >>> v.is_view True >>> v[:] array([1, 2, 1, ..., 2, 2, 1], dtype=uint8) Views can be used to modify data: >>> x = v[:] >>> x.sort() >>> v[:] = x >>> v[:] array([1, 1, 1, ..., 2, 2, 2], dtype=uint8) >>> a[:] array([b'female', b'female', b'female', ..., b'male', b'male', b'male'], dtype='|S6') View as a different dtype with the same itemsize: >>> data = np.random.randint(0, 2, size=10000, dtype='u1') >>> a = zarr.array(data, chunks=1000) >>> a[:] array([0, 0, 1, ..., 1, 0, 0], dtype=uint8) >>> v = a.view(dtype=bool) >>> v[:] array([False, False, True, ..., True, False, False], dtype=bool) >>> np.all(a[:].view(dtype=bool) == v[:]) True An array can be viewed with a dtype with a different itemsize, however some care is needed to adjust the shape and chunk shape so that chunk data is interpreted correctly: >>> data = np.arange(10000, dtype='u2') >>> a = zarr.array(data, chunks=1000) >>> a[:10] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16) >>> v = a.view(dtype='u1', shape=20000, chunks=2000) >>> v[:10] array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8) >>> np.all(a[:].view('u1') == v[:]) True Change fill value for uninitialized chunks: >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1') >>> a[:] array([-1, -1, -1, ..., -1, -1, -1], dtype=int8) >>> v = a.view(fill_value=42) >>> v[:] array([42, 42, 42, ..., 42, 42, 42], dtype=int8) Note that resizing or appending to views is not permitted: >>> a = zarr.empty(10000) >>> v = a.view() >>> try: ... v.resize(20000) ... except PermissionError as e: ... print(e) not permitted for views """ # flake8: noqa store = self._store chunk_store = self._chunk_store path = self._path if read_only is None: read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, synchronizer=synchronizer, cache_metadata=True) a._is_view = True # allow override of some properties if dtype is None: dtype = self._dtype else: dtype = np.dtype(dtype) a._dtype = dtype if shape is None: shape = self._shape else: shape = normalize_shape(shape) a._shape = shape if chunks is not None: chunks = normalize_chunks(chunks, shape, dtype.itemsize) a._chunks = chunks if fill_value is not None: a._fill_value = fill_value if filters is not None: a._filters = filters return a
def test_normalize_chunks(): assert (10, ) == normalize_chunks((10, ), (100, ), 1) assert (10, ) == normalize_chunks([10], (100, ), 1) assert (10, ) == normalize_chunks(10, (100, ), 1) assert (10, 10) == normalize_chunks((10, 10), (100, 10), 1) assert (10, 10) == normalize_chunks(10, (100, 10), 1) assert (10, 10) == normalize_chunks((10, None), (100, 10), 1) assert (30, 30, 30) == normalize_chunks(30, (100, 20, 10), 1) assert (30, 20, 10) == normalize_chunks((30, ), (100, 20, 10), 1) assert (30, 20, 10) == normalize_chunks((30, None), (100, 20, 10), 1) assert (30, 20, 10) == normalize_chunks((30, None, None), (100, 20, 10), 1) assert (30, 20, 10) == normalize_chunks((30, 20, None), (100, 20, 10), 1) assert (30, 20, 10) == normalize_chunks((30, 20, 10), (100, 20, 10), 1) with pytest.raises(ValueError): normalize_chunks('foo', (100, ), 1) with pytest.raises(ValueError): normalize_chunks((100, 10), (100, ), 1) # test auto-chunking assert (100, ) == normalize_chunks(None, (100, ), 1) assert (100, ) == normalize_chunks(-1, (100, ), 1) assert (30, 20, 10) == normalize_chunks((30, -1, None), (100, 20, 10), 1)
def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) if dtype.kind in 'mM': raise ValueError( 'datetime64 and timedelta64 dtypes are not currently supported; ' 'please store the data using int64 instead') chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) fill_value = normalize_fill_value(fill_value, dtype) # compressor prep if shape == (): # no point in compressing a 0-dimensional array, only a single value compressor = None elif compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor # obtain compressor config compressor_config = None if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')
def test_normalize_chunks(): eq((10,), normalize_chunks((10,), (100,), 1)) eq((10,), normalize_chunks([10], (100,), 1)) eq((10,), normalize_chunks(10, (100,), 1)) eq((10, 10), normalize_chunks((10, 10), (100, 10), 1)) eq((10, 10), normalize_chunks(10, (100, 10), 1)) eq((10, 10), normalize_chunks((10, None), (100, 10), 1)) eq((30, 20, 10), normalize_chunks(30, (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30,), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, None, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, 20, None), (100, 20, 10), 1)) eq((30, 20, 10), normalize_chunks((30, 20, 10), (100, 20, 10), 1)) with assert_raises(ValueError): normalize_chunks('foo', (100,), 1) with assert_raises(ValueError): normalize_chunks((100, 10), (100,), 1) # test auto-chunking chunks = normalize_chunks(None, (100,), 1) eq((100,), chunks)