def test_encode_decode_array_datetime_timedelta(): # some variations for k in ['m8[s]', 'M8[s]']: compressor = Blosc(cname='lz4', clevel=3, shuffle=2) dtype = np.dtype(k) fill_value = dtype.type("NaT") meta = dict( shape=(100, 100), chunks=(10, 10), dtype=dtype, compressor=compressor.get_config(), fill_value=fill_value, order=dtype.char, filters=[] ) meta_json = '''{ "chunks": [10, 10], "compressor": { "id": "blosc", "clevel": 3, "cname": "lz4", "shuffle": 2, "blocksize": 0 }, "dtype": "%s", "fill_value": -9223372036854775808, "filters": [], "order": "%s", "shape": [100, 100], "zarr_format": %s }''' % (dtype.str, dtype.char, ZARR_FORMAT) # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] assert meta['shape'] == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] assert meta['dtype'] == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] # Based off of this SO answer: https://stackoverflow.com/a/49972198 assert np.all( fill_value.view((np.uint8, fill_value.itemsize)) == meta_dec['fill_value'].view((np.uint8, meta_dec['fill_value'].itemsize)) ) assert [] == meta_dec['filters']
def test_encode_decode_array_2(): # some variations df = Delta(astype='u2', dtype='V14') compressor = Blosc(cname='lz4', clevel=3, shuffle=2) dtype = np.dtype([('a', 'i4'), ('b', 'S10')]) fill_value = np.zeros((), dtype=dtype)[()] meta = dict( shape=(100, 100), chunks=(10, 10), dtype=dtype, compressor=compressor.get_config(), fill_value=fill_value, order='F', filters=[df.get_config()] ) meta_json = '''{ "chunks": [10, 10], "compressor": { "id": "blosc", "clevel": 3, "cname": "lz4", "shuffle": 2, "blocksize": 0 }, "dtype": [["a", "<i4"], ["b", "|S10"]], "fill_value": "AAAAAAAAAAAAAAAAAAA=", "filters": [ {"id": "delta", "astype": "<u2", "dtype": "|V14"} ], "order": "F", "shape": [100, 100], "zarr_format": %s }''' % ZARR_FORMAT # test encoding meta_enc = encode_array_metadata(meta) assert_json_equal(meta_json, meta_enc) # test decoding meta_dec = decode_array_metadata(meta_enc) assert ZARR_FORMAT == meta_dec['zarr_format'] assert meta['shape'] == meta_dec['shape'] assert meta['chunks'] == meta_dec['chunks'] assert meta['dtype'] == meta_dec['dtype'] assert meta['compressor'] == meta_dec['compressor'] assert meta['order'] == meta_dec['order'] assert fill_value == meta_dec['fill_value'] assert [df.get_config()] == meta_dec['filters']
def test_format_compatibility(): # This test is intended to catch any unintended changes that break the ability to # read data stored with a previous minor version (which should be format-compatible). # fixture data fixture = group(store=DirectoryStore('fixture')) # set seed to get consistent random data np.random.seed(42) arrays_chunks = [ (np.arange(1111, dtype='i1'), 100), (np.arange(1111, dtype='i2'), 100), (np.arange(1111, dtype='i4'), 100), (np.arange(1111, dtype='i8'), 1000), (np.random.randint(0, 200, size=2222, dtype='u1'), 100), (np.random.randint(0, 2000, size=2222, dtype='u2'), 100), (np.random.randint(0, 2000, size=2222, dtype='u4'), 100), (np.random.randint(0, 2000, size=2222, dtype='u8'), 100), (np.linspace(0, 1, 3333, dtype='f2'), 100), (np.linspace(0, 1, 3333, dtype='f4'), 100), (np.linspace(0, 1, 3333, dtype='f8'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f2'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f4'), 100), (np.random.normal(loc=0, scale=1, size=4444).astype('f8'), 100), (np.random.choice([b'A', b'C', b'G', b'T'], size=5555, replace=True).astype('S'), 100), (np.random.choice(['foo', 'bar', 'baz', 'quux'], size=5555, replace=True).astype('U'), 100), (np.random.choice([0, 1 / 3, 1 / 7, 1 / 9, np.nan], size=5555, replace=True).astype('f8'), 100), (np.random.randint(0, 2, size=5555, dtype=bool), 100), (np.arange(20000, dtype='i4').reshape(2000, 10, order='C'), (100, 3)), (np.arange(20000, dtype='i4').reshape(200, 100, order='F'), (100, 30)), (np.arange(20000, dtype='i4').reshape(200, 10, 10, order='C'), (100, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 100, 10, order='F'), (10, 30, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='C'), (10, 3, 3, 3)), (np.arange(20000, dtype='i4').reshape(20, 10, 10, 10, order='F'), (10, 3, 3, 3)), ] compressors = [ None, Zlib(level=1), BZ2(level=1), Blosc(cname='zstd', clevel=1, shuffle=0), Blosc(cname='zstd', clevel=1, shuffle=1), Blosc(cname='zstd', clevel=1, shuffle=2), Blosc(cname='lz4', clevel=1, shuffle=0), ] for i, (arr, chunks) in enumerate(arrays_chunks): if arr.flags.f_contiguous: order = 'F' else: order = 'C' for j, compressor in enumerate(compressors): path = '{}/{}'.format(i, j) if path not in fixture: # pragma: no cover # store the data - should be one-time operation fixture.array(path, data=arr, chunks=chunks, order=order, compressor=compressor) # setup array z = fixture[path] # check contents if arr.dtype.kind == 'f': assert_array_almost_equal(arr, z[:]) else: assert_array_equal(arr, z[:]) # check dtype assert arr.dtype == z.dtype # check compressor if compressor is None: assert z.compressor is None else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config()
def test_migrate_1to2(): from zarr import meta_v1 # N.B., version 1 did not support hierarchies, so we only have to be # concerned about migrating a single array at the root of the store # setup store = dict() meta = dict(shape=(100, ), chunks=(10, ), dtype=np.dtype('f4'), compression='zlib', compression_opts=1, fill_value=None, order='C') meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') # run migration migrate_1to2(store) # check results assert 'meta' not in store assert array_meta_key in store assert 'attrs' not in store assert attrs_key in store meta_migrated = decode_array_metadata(store[array_meta_key]) assert 2 == meta_migrated['zarr_format'] # preserved fields for f in 'shape', 'chunks', 'dtype', 'fill_value', 'order': assert meta[f] == meta_migrated[f] # migrate should have added empty filters field assert meta_migrated['filters'] is None # check compression and compression_opts migrated to compressor assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert meta_migrated['compressor'] == Zlib(1).get_config() # check dict compression_opts store = dict() meta['compression'] = 'blosc' meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert (meta_migrated['compressor'] == Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) # check 'none' compression is migrated to None (null in JSON) store = dict() meta['compression'] = 'none' meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert meta_migrated['compressor'] is None
from zarr.util import normalize_shape, normalize_chunks, normalize_order, \ normalize_storage_path, buffer_size from zarr.meta import encode_array_metadata, encode_group_metadata from zarr.compat import PY2, binary_type from numcodecs.registry import codec_registry from zarr.errors import err_contains_group, err_contains_array, \ err_path_not_found, err_bad_compressor, err_fspath_exists_notdir, \ err_read_only array_meta_key = '.zarray' group_meta_key = '.zgroup' attrs_key = '.zattrs' try: from zarr.codecs import Blosc default_compressor = Blosc() except ImportError: # pragma: no cover from zarr.codecs import Zlib default_compressor = Zlib() def _path_to_prefix(path): # assume path already normalized if path: prefix = path + '/' else: prefix = '' return prefix def contains_array(store, path=None):
def create_array(self, read_only=False, **kwargs): store = dict() compressor = Blosc(cname='zstd', clevel=1, shuffle=1) kwargs.setdefault('compressor', compressor) init_array(store, **kwargs) return Array(store, read_only=read_only)