def test_read_zarr(self): from z5py.dataset import Dataset dtypes = list(Dataset._dtype_dict.keys()) zarr_compressors = {'blosc': numcodecs.Blosc(), 'zlib': numcodecs.Zlib(), 'raw': None, 'bzip2': numcodecs.BZ2()} # conda-forge version of numcodecs is not up-to-data # for python 3.5 and GZip is missing # thats why we need to check explicitly here to not fail the test if hasattr(numcodecs, 'GZip'): zarr_compressors.update({'gzip': numcodecs.GZip()}) zarr.open(self.path) for dtype in dtypes: for compression in zarr_compressors: data = np.random.randint(0, 127, size=self.shape).astype(dtype) # write the data with zarr key = 'test_%s_%s' % (dtype, compression) ar = zarr.open(os.path.join(self.path, key), mode='w', shape=self.shape, chunks=self.chunks, dtype=dtype, compressor=zarr_compressors[compression]) ar[:] = data # read with z5py out = z5py.File(self.path)[key][:] self.assertEqual(data.shape, out.shape) self.assertTrue(np.allclose(data, out))
def test_read_zarr(self): import numcodecs from z5py.dataset import Dataset dtypes = list(Dataset._zarr_dtype_dict.values()) compressions = Dataset.compressors_zarr zarr_compressors = { 'blosc': numcodecs.Blosc(), 'zlib': numcodecs.Zlib(), 'raw': None, 'bzip2': numcodecs.BZ2() } for dtype in dtypes: for compression in compressions: data = np.random.randint(0, 127, size=self.shape).astype(dtype) # write the data with zarr key = 'test_%s_%s' % (dtype, compression) ar = zarr.open(os.path.join(self.path, key), mode='w', shape=self.shape, chunks=self.chunks, dtype=dtype, compressor=zarr_compressors[compression]) ar[:] = data # read with z5py out = z5py.File(self.path)[key][:] self.assertEqual(data.shape, out.shape) self.assertTrue(np.allclose(data, out))
def make_test_data(bucket_name=None): """ Make zarr test data in an s3 bucket. The bucket `bucket_name` must already exist and access credentials must be stored in a way that can be accessed by s3fs. """ import s3fs import zarr import numcodecs if bucket_name is None: bucket_name = TestS3.bucket_name # access the s3 filesysyem fs = s3fs.S3FileSystem(anon=False) store = s3fs.S3Map(root=bucket_name, s3=fs) # test data image data = TestS3.data # write remote zarr data f = zarr.group(store) f.attrs['Q'] = 42 # create dataset ds = f.create_dataset('data', shape=data.shape, chunks=(256, 256, 3), dtype=data.dtype, compressor=numcodecs.Zlib()) ds[:] = data ds.attrs['x'] = 'y' # create a and dataset group g = f.create_group('group') ds = g.create_dataset('data', shape=data.shape, chunks=(256, 256, 3), dtype=data.dtype, compressor=numcodecs.Zlib()) ds[:] = data ds.attrs['x'] = 'y'
def _translator(self, name: str, h5obj: Union[h5py.Dataset, h5py.Group]): """Produce Zarr metadata for all groups and datasets in the HDF5 file. """ refs = {} if isinstance(h5obj, h5py.Dataset): lggr.debug(f'HDF5 dataset: {h5obj.name}') if h5obj.id.get_create_plist().get_layout() == h5py.h5d.COMPACT: RuntimeError( f'Compact HDF5 datasets not yet supported: <{h5obj.name} ' f'{h5obj.shape} {h5obj.dtype} {h5obj.nbytes} bytes>') return # # check for unsupported HDF encoding/filters # if h5obj.scaleoffset: raise RuntimeError( f'{h5obj.name} uses HDF5 scaleoffset filter - not supported by reference-maker' ) if h5obj.compression in ('szip', 'lzf'): raise RuntimeError( f'{h5obj.name} uses szip or lzf compression - not supported by reference-maker' ) if h5obj.compression == 'gzip': compression = numcodecs.Zlib(level=h5obj.compression_opts) else: compression = None # Add filter for shuffle filters = [] if h5obj.shuffle: filters.append( numcodecs.Shuffle(elementsize=h5obj.dtype.itemsize)) # Get storage info of this HDF5 dataset... cinfo = self._storage_info(h5obj) if h5py.h5ds.is_scale(h5obj.id) and not cinfo: return # Create a Zarr array equivalent to this HDF5 dataset... za = self._zroot.create_dataset(h5obj.name, shape=h5obj.shape, dtype=h5obj.dtype, chunks=h5obj.chunks or False, fill_value=h5obj.fillvalue, compression=compression, filters=filters, overwrite=True) lggr.debug(f'Created Zarr array: {za}') self._transfer_attrs(h5obj, za) adims = self._get_array_dims(h5obj) za.attrs['_ARRAY_DIMENSIONS'] = adims lggr.debug(f'_ARRAY_DIMENSIONS = {adims}') # Store chunk location metadata... if cinfo: for k, v in cinfo.items(): if h5obj.fletcher32: logging.info("Discarding fletcher32 checksum") v['size'] -= 4 self.store[za._chunk_key(k)] = [ self._uri, v['offset'], v['size'] ] elif isinstance(h5obj, h5py.Group): lggr.debug(f'HDF5 group: {h5obj.name}') zgrp = self._zroot.create_group(h5obj.name) self._transfer_attrs(h5obj, zgrp)
import zarr import numcodecs from skimage.data import astronaut # choose chunks s.t. we do have overhanging edge-chunks CHUNKS = (100, 100, 1) STR_TO_COMPRESSOR = { 'gzip': numcodecs.GZip(), 'blosc': numcodecs.Blosc(), 'zlib': numcodecs.Zlib() } def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]): path = '../data/zarr.zr' im = astronaut() f = zarr.open(path) for compressor in compressors: name = compressor if compressor is not None else 'raw' compressor_impl = STR_TO_COMPRESSOR[ compressor] if compressor is not None else None f.create_dataset(name, data=im, chunks=CHUNKS, compressor=compressor_impl) # this needs PR https://github.com/zarr-developers/zarr/pull/309 def generate_n5_format(compressors=['gzip', None]): path = '../data/zarr.n5'