def test_init_group_overwrite_path(self): # setup path = 'foo/bar' store = self.create_store() meta = dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), compressor=None, fill_value=0, order='F', filters=None) store[array_meta_key] = encode_array_metadata(meta) store[path + '/' + array_meta_key] = encode_array_metadata(meta) # don't overwrite with assert_raises(ValueError): init_group(store, path=path) # do overwrite try: init_group(store, overwrite=True, path=path) except NotImplementedError: pass else: assert array_meta_key not in store assert group_meta_key in store assert (path + '/' + array_meta_key) not in store assert (path + '/' + group_meta_key) in store # should have been overwritten meta = decode_group_metadata(store[path + '/' + group_meta_key]) eq(ZARR_FORMAT, meta['zarr_format'])
def test_init_group_overwrite_path(self): # setup path = 'foo/bar' store = self.create_store() meta = dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=None, fill_value=0, order='F', filters=None) store[array_meta_key] = encode_array_metadata(meta) store[path + '/' + array_meta_key] = encode_array_metadata(meta) # don't overwrite with pytest.raises(ValueError): init_group(store, path=path) # do overwrite try: init_group(store, overwrite=True, path=path) except NotImplementedError: pass else: assert array_meta_key not in store assert group_meta_key in store assert (path + '/' + array_meta_key) not in store assert (path + '/' + group_meta_key) in store # should have been overwritten meta = decode_group_metadata(store[path + '/' + group_meta_key]) assert ZARR_FORMAT == meta['zarr_format']
def test_init_group_overwrite(self): # setup store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=None, fill_value=0, order='F', filters=None)) # don't overwrite array (default) with assert_raises(ValueError): init_group(store) # do overwrite try: init_group(store, overwrite=True) except NotImplementedError: pass else: assert array_meta_key not in store assert group_meta_key in store meta = decode_group_metadata(store[group_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) # don't overwrite group with assert_raises(ValueError): init_group(store)
def test_init_group_overwrite_chunk_store(self): # setup store = self.create_store() chunk_store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=None, fill_value=0, filters=None, order='F')) chunk_store['foo'] = b'bar' chunk_store['baz'] = b'quux' # don't overwrite array (default) with pytest.raises(ValueError): init_group(store, chunk_store=chunk_store) # do overwrite try: init_group(store, overwrite=True, chunk_store=chunk_store) except NotImplementedError: pass else: assert array_meta_key not in store assert group_meta_key in store meta = decode_group_metadata(store[group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert 'foo' not in chunk_store assert 'baz' not in chunk_store # don't overwrite group with pytest.raises(ValueError): init_group(store)
def test_init_array_overwrite(self): # setup store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=Zlib(1).get_config(), fill_value=0, order='F', filters=None)) # don't overwrite (default) with pytest.raises(ValueError): init_array(store, shape=1000, chunks=100) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', overwrite=True) except NotImplementedError: pass else: assert array_meta_key in store meta = decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000, ) == meta['shape'] assert (100, ) == meta['chunks'] assert np.dtype('i4') == meta['dtype']
def test_init_array_overwrite(self): # setup store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), compressor=Zlib(1).get_config(), fill_value=0, order='F', filters=None) ) # don't overwrite (default) with assert_raises(ValueError): init_array(store, shape=1000, chunks=100) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', overwrite=True) except NotImplementedError: pass else: assert array_meta_key in store meta = decode_array_metadata(store[array_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) eq((1000,), meta['shape']) eq((100,), meta['chunks']) eq(np.dtype('i4'), meta['dtype'])
def test_encode_decode_array_1(): meta = dict(shape=(100, ), chunks=(10, ), dtype=np.dtype('f8'), compressor=Zlib(1).get_config(), fill_value=None, filters=None, order='C') meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": "<f8", "fill_value": null, "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % ZARR_FORMAT # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] assert meta['shape'] == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] assert meta['dtype'] == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] assert meta_dec['fill_value'] is None assert meta_dec['filters'] is None
def test_init_group_overwrite(self): # setup store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), compressor=None, fill_value=0, order='F', filters=None) ) # don't overwrite array (default) with assert_raises(ValueError): init_group(store) # do overwrite try: init_group(store, overwrite=True) except NotImplementedError: pass else: assert array_meta_key not in store assert group_meta_key in store meta = decode_group_metadata(store[group_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) # don't overwrite group with assert_raises(ValueError): init_group(store)
def test_init_array_overwrite_chunk_store(self): # setup store = self.create_store() chunk_store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), compressor=None, fill_value=0, filters=None, order='F') ) chunk_store['0'] = b'aaa' chunk_store['1'] = b'bbb' # don't overwrite (default) with assert_raises(ValueError): init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', overwrite=True, chunk_store=chunk_store) except NotImplementedError: pass else: assert array_meta_key in store meta = decode_array_metadata(store[array_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) eq((1000,), meta['shape']) eq((100,), meta['chunks']) eq(np.dtype('i4'), meta['dtype']) assert '0' not in chunk_store assert '1' not in chunk_store
def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None and chunk_store != store: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) # obtain compressor config if compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) else: compressor_config = None # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')
def test_encode_decode_array_datetime_timedelta(): # some variations for k in ['m8[s]', 'M8[s]']: compressor = Blosc(cname='lz4', clevel=3, shuffle=2) dtype = np.dtype(k) fill_value = dtype.type("NaT") meta = dict( shape=(100, 100), chunks=(10, 10), dtype=dtype, compressor=compressor.get_config(), fill_value=fill_value, order=dtype.char, filters=[] ) meta_json = '''{ "chunks": [10, 10], "compressor": { "id": "blosc", "clevel": 3, "cname": "lz4", "shuffle": 2, "blocksize": 0 }, "dtype": "%s", "fill_value": -9223372036854775808, "filters": [], "order": "%s", "shape": [100, 100], "zarr_format": %s }''' % (dtype.str, dtype.char, ZARR_FORMAT) # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] assert meta['shape'] == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] assert meta['dtype'] == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] # Based off of this SO answer: https://stackoverflow.com/a/49972198 assert np.all( fill_value.view((np.uint8, fill_value.itemsize)) == meta_dec['fill_value'].view((np.uint8, meta_dec['fill_value'].itemsize)) ) assert [] == meta_dec['filters']
def test_init_array_overwrite_path(self): # setup path = 'foo/bar' store = self.create_store() meta = dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=Zlib(1).get_config(), fill_value=0, order='F', filters=None) store[array_meta_key] = encode_array_metadata(meta) store[path + '/' + array_meta_key] = encode_array_metadata(meta) # don't overwrite with assert_raises(ValueError): init_array(store, shape=1000, chunks=100, path=path) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', path=path, overwrite=True) except NotImplementedError: pass else: assert group_meta_key in store assert array_meta_key not in store assert (path + '/' + array_meta_key) in store # should have been overwritten meta = decode_array_metadata(store[path + '/' + array_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) eq((1000, ), meta['shape']) eq((100, ), meta['chunks']) eq(np.dtype('i4'), meta['dtype'])
def test_encode_decode_array_2(): # some variations df = Delta(astype='u2', dtype='V14') compressor = Blosc(cname='lz4', clevel=3, shuffle=2) dtype = np.dtype([('a', 'i4'), ('b', 'S10')]) fill_value = np.zeros((), dtype=dtype)[()] meta = dict( shape=(100, 100), chunks=(10, 10), dtype=dtype, compressor=compressor.get_config(), fill_value=fill_value, order='F', filters=[df.get_config()] ) meta_json = '''{ "chunks": [10, 10], "compressor": { "id": "blosc", "clevel": 3, "cname": "lz4", "shuffle": 2, "blocksize": 0 }, "dtype": [["a", "<i4"], ["b", "|S10"]], "fill_value": "AAAAAAAAAAAAAAAAAAA=", "filters": [ {"id": "delta", "astype": "<u2", "dtype": "|V14"} ], "order": "F", "shape": [100, 100], "zarr_format": %s }''' % ZARR_FORMAT # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] assert meta['shape'] == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] assert meta['dtype'] == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] assert fill_value == meta_dec['fill_value'] assert [df.get_config()] == meta_dec['filters']
def migrate_1to2(store): """Migrate array metadata in `store` from Zarr format version 1 to version 2. Parameters ---------- store : MutableMapping Store to be migrated. Notes ----- Version 1 did not support hierarchies, so this migration function will look for a single array in `store` and migrate the array metadata to version 2. """ # migrate metadata from zarr import meta_v1 meta = meta_v1.decode_metadata(store['meta']) del store['meta'] # add empty filters meta['filters'] = None # migration compression metadata compression = meta['compression'] if compression is None or compression == 'none': compressor_config = None else: compression_opts = meta['compression_opts'] codec_cls = codec_registry[compression] if isinstance(compression_opts, dict): compressor = codec_cls(**compression_opts) else: compressor = codec_cls(compression_opts) compressor_config = compressor.get_config() meta['compressor'] = compressor_config del meta['compression'] del meta['compression_opts'] # store migrated metadata store[array_meta_key] = encode_array_metadata(meta) # migrate user attributes store[attrs_key] = store['attrs'] del store['attrs']
def _flush_metadata_nosync(self): if self._is_view: raise PermissionError('not permitted for views') if self._compressor: compressor_config = self._compressor.get_config() else: compressor_config = None if self._filters: filters_config = [f.get_config() for f in self._filters] else: filters_config = None meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype, compressor=compressor_config, fill_value=self._fill_value, order=self._order, filters=filters_config) mkey = self._key_prefix + array_meta_key self._store[mkey] = encode_array_metadata(meta)
def test_encode_decode_fill_values_bytes(): dtype = np.dtype('S10') fills = b'foo', bytes(10) for v in fills: # setup and encode metadata meta = dict( shape=(100,), chunks=(10,), dtype=dtype, compressor=Zlib(1).get_config(), fill_value=v, filters=None, order='C' ) meta_enc = encode_array_metadata(meta) # define expected metadata encoded as JSON s = base64.standard_b64encode(v) if not PY2: s = s.decode() meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": "|S10", "fill_value": "%s", "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % (s, ZARR_FORMAT) # test encoding assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) actual = meta_dec['fill_value'] expect = np.array(v, dtype=dtype)[()] assert expect == actual
def test_encode_decode_array_fill_values(): fills = ( (np.nan, "NaN", np.isnan), (np.NINF, "-Infinity", np.isneginf), (np.PINF, "Infinity", np.isposinf), ) for v, s, f in fills: meta = dict( shape=(100,), chunks=(10,), dtype=np.dtype('f8'), compressor=Zlib(1).get_config(), fill_value=v, filters=None, order='C' ) meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": "<f8", "fill_value": "%s", "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % (s, ZARR_FORMAT) # test encoding meta_enc = encode_array_metadata(meta) assert_json_eq(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) actual = meta_dec['fill_value'] assert f(actual)
def test_encode_decode_fill_values_nan(): fills = ( (np.nan, "NaN", np.isnan), (np.NINF, "-Infinity", np.isneginf), (np.PINF, "Infinity", np.isposinf), ) for v, s, f in fills: meta = dict( shape=(100,), chunks=(10,), dtype=np.dtype('f8'), compressor=Zlib(1).get_config(), fill_value=v, filters=None, order='C' ) meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": "<f8", "fill_value": "%s", "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % (s, ZARR_FORMAT) # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) actual = meta_dec['fill_value'] assert f(actual)
def test_encode_decode_array_structured(): meta = dict( shape=(100,), chunks=(10,), dtype=np.dtype('i8, (10, 10)f8, (5, 10, 15)u1'), compressor=Zlib(1).get_config(), fill_value=None, filters=None, order='C' ) meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": [["f0", "<i8"], ["f1", "<f8", [10, 10]], ["f2", "|u1", [5, 10, 15]]], "fill_value": null, "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % ZARR_FORMAT # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] # to maintain consistency with numpy unstructured arrays, unpack dimensions into shape assert meta['shape'] + meta['dtype'].shape == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] # to maintain consistency with numpy unstructured arrays, unpack dimensions into shape assert meta['dtype'].base == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] assert meta_dec['fill_value'] is None assert meta_dec['filters'] is None
def test_init_array_overwrite_chunk_store(self): # setup store = self.create_store() chunk_store = self.create_store() store[array_meta_key] = encode_array_metadata( dict(shape=(2000, ), chunks=(200, ), dtype=np.dtype('u1'), compressor=None, fill_value=0, filters=None, order='F')) chunk_store['0'] = b'aaa' chunk_store['1'] = b'bbb' # don't overwrite (default) with assert_raises(ValueError): init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) # do overwrite try: init_array(store, shape=1000, chunks=100, dtype='i4', overwrite=True, chunk_store=chunk_store) except NotImplementedError: pass else: assert array_meta_key in store meta = decode_array_metadata(store[array_meta_key]) eq(ZARR_FORMAT, meta['zarr_format']) eq((1000, ), meta['shape']) eq((100, ), meta['chunks']) eq(np.dtype('i4'), meta['dtype']) assert '0' not in chunk_store assert '1' not in chunk_store
def test_encode_decode_array_1(): meta = dict( shape=(100,), chunks=(10,), dtype=np.dtype('f8'), compressor=Zlib(1).get_config(), fill_value=None, filters=None, order='C' ) meta_json = '''{ "chunks": [10], "compressor": {"id": "zlib", "level": 1}, "dtype": "<f8", "fill_value": null, "filters": null, "order": "C", "shape": [100], "zarr_format": %s }''' % ZARR_FORMAT # test encoding meta_enc = encode_array_metadata(meta) assert_json_eq(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) eq(ZARR_FORMAT, meta_dec['zarr_format']) eq(meta['shape'], meta_dec['shape']) eq(meta['chunks'], meta_dec['chunks']) eq(meta['dtype'], meta_dec['dtype']) eq(meta['compressor'], meta_dec['compressor']) eq(meta['order'], meta_dec['order']) assert_is_none(meta_dec['fill_value']) assert_is_none(meta_dec['filters'])
def _init_array_metadata(store, shape, chunks=None, dtype=None, compressor='default', fill_value=None, order='C', overwrite=False, path=None, chunk_store=None, filters=None): # guard conditions if overwrite: # attempt to delete any pre-existing items in store rmdir(store, path) if chunk_store is not None: rmdir(chunk_store, path) elif contains_array(store, path): err_contains_array(path) elif contains_group(store, path): err_contains_group(path) # normalize metadata shape = normalize_shape(shape) dtype = np.dtype(dtype) if dtype.kind in 'mM': raise ValueError( 'datetime64 and timedelta64 dtypes are not currently supported; ' 'please store the data using int64 instead') chunks = normalize_chunks(chunks, shape, dtype.itemsize) order = normalize_order(order) fill_value = normalize_fill_value(fill_value, dtype) # compressor prep if shape == (): # no point in compressing a 0-dimensional array, only a single value compressor = None elif compressor == 'none': # compatibility compressor = None elif compressor == 'default': compressor = default_compressor # obtain compressor config compressor_config = None if compressor: try: compressor_config = compressor.get_config() except AttributeError: err_bad_compressor(compressor) # obtain filters config if filters: filters_config = [f.get_config() for f in filters] else: filters_config = None # initialize metadata meta = dict(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor_config, fill_value=fill_value, order=order, filters=filters_config) key = _path_to_prefix(path) + array_meta_key store[key] = encode_array_metadata(meta) # initialize attributes key = _path_to_prefix(path) + attrs_key store[key] = json.dumps(dict()).encode('ascii')