Beispiel #1
0
    def test_read_zarr(self):
        from z5py.dataset import Dataset
        dtypes = list(Dataset._dtype_dict.keys())
        zarr_compressors = {'blosc': numcodecs.Blosc(),
                            'zlib': numcodecs.Zlib(),
                            'raw': None,
                            'bzip2': numcodecs.BZ2()}

        # conda-forge version of numcodecs is not up-to-data
        # for python 3.5 and GZip is missing
        # thats why we need to check explicitly here to not fail the test
        if hasattr(numcodecs, 'GZip'):
            zarr_compressors.update({'gzip': numcodecs.GZip()})

        zarr.open(self.path)
        for dtype in dtypes:
            for compression in zarr_compressors:
                data = np.random.randint(0, 127, size=self.shape).astype(dtype)
                # write the data with zarr
                key = 'test_%s_%s' % (dtype, compression)
                ar = zarr.open(os.path.join(self.path, key), mode='w',
                               shape=self.shape, chunks=self.chunks,
                               dtype=dtype, compressor=zarr_compressors[compression])
                ar[:] = data
                # read with z5py
                out = z5py.File(self.path)[key][:]
                self.assertEqual(data.shape, out.shape)
                self.assertTrue(np.allclose(data, out))
Beispiel #2
0
    def test_read_zarr(self):
        import numcodecs
        from z5py.dataset import Dataset
        dtypes = list(Dataset._zarr_dtype_dict.values())
        compressions = Dataset.compressors_zarr
        zarr_compressors = {
            'blosc': numcodecs.Blosc(),
            'zlib': numcodecs.Zlib(),
            'raw': None,
            'bzip2': numcodecs.BZ2()
        }

        for dtype in dtypes:
            for compression in compressions:
                data = np.random.randint(0, 127, size=self.shape).astype(dtype)
                # write the data with zarr
                key = 'test_%s_%s' % (dtype, compression)
                ar = zarr.open(os.path.join(self.path, key),
                               mode='w',
                               shape=self.shape,
                               chunks=self.chunks,
                               dtype=dtype,
                               compressor=zarr_compressors[compression])
                ar[:] = data
                # read with z5py
                out = z5py.File(self.path)[key][:]
                self.assertEqual(data.shape, out.shape)
                self.assertTrue(np.allclose(data, out))
Beispiel #3
0
    def make_test_data(bucket_name=None):
        """ Make zarr test data in an s3 bucket.

        The bucket `bucket_name` must already exist and
        access credentials must be stored in a way that can be accessed by
        s3fs.
        """
        import s3fs
        import zarr
        import numcodecs

        if bucket_name is None:
            bucket_name = TestS3.bucket_name

        # access the s3 filesysyem
        fs = s3fs.S3FileSystem(anon=False)
        store = s3fs.S3Map(root=bucket_name, s3=fs)

        # test data image
        data = TestS3.data

        # write remote zarr data
        f = zarr.group(store)
        f.attrs['Q'] = 42

        # create dataset
        ds = f.create_dataset('data', shape=data.shape, chunks=(256, 256, 3), dtype=data.dtype,
                              compressor=numcodecs.Zlib())
        ds[:] = data
        ds.attrs['x'] = 'y'

        # create a and dataset group
        g = f.create_group('group')
        ds = g.create_dataset('data', shape=data.shape, chunks=(256, 256, 3), dtype=data.dtype,
                              compressor=numcodecs.Zlib())
        ds[:] = data
        ds.attrs['x'] = 'y'
Beispiel #4
0
    def _translator(self, name: str, h5obj: Union[h5py.Dataset, h5py.Group]):
        """Produce Zarr metadata for all groups and datasets in the HDF5 file.
        """
        refs = {}
        if isinstance(h5obj, h5py.Dataset):
            lggr.debug(f'HDF5 dataset: {h5obj.name}')
            if h5obj.id.get_create_plist().get_layout() == h5py.h5d.COMPACT:
                RuntimeError(
                    f'Compact HDF5 datasets not yet supported: <{h5obj.name} '
                    f'{h5obj.shape} {h5obj.dtype} {h5obj.nbytes} bytes>')
                return

            #
            # check for unsupported HDF encoding/filters
            #
            if h5obj.scaleoffset:
                raise RuntimeError(
                    f'{h5obj.name} uses HDF5 scaleoffset filter - not supported by reference-maker'
                )
            if h5obj.compression in ('szip', 'lzf'):
                raise RuntimeError(
                    f'{h5obj.name} uses szip or lzf compression - not supported by reference-maker'
                )
            if h5obj.compression == 'gzip':
                compression = numcodecs.Zlib(level=h5obj.compression_opts)
            else:
                compression = None

            # Add filter for shuffle
            filters = []
            if h5obj.shuffle:
                filters.append(
                    numcodecs.Shuffle(elementsize=h5obj.dtype.itemsize))

            # Get storage info of this HDF5 dataset...
            cinfo = self._storage_info(h5obj)
            if h5py.h5ds.is_scale(h5obj.id) and not cinfo:
                return

            # Create a Zarr array equivalent to this HDF5 dataset...
            za = self._zroot.create_dataset(h5obj.name,
                                            shape=h5obj.shape,
                                            dtype=h5obj.dtype,
                                            chunks=h5obj.chunks or False,
                                            fill_value=h5obj.fillvalue,
                                            compression=compression,
                                            filters=filters,
                                            overwrite=True)
            lggr.debug(f'Created Zarr array: {za}')
            self._transfer_attrs(h5obj, za)

            adims = self._get_array_dims(h5obj)
            za.attrs['_ARRAY_DIMENSIONS'] = adims
            lggr.debug(f'_ARRAY_DIMENSIONS = {adims}')

            # Store chunk location metadata...
            if cinfo:
                for k, v in cinfo.items():
                    if h5obj.fletcher32:
                        logging.info("Discarding fletcher32 checksum")
                        v['size'] -= 4
                    self.store[za._chunk_key(k)] = [
                        self._uri, v['offset'], v['size']
                    ]

        elif isinstance(h5obj, h5py.Group):
            lggr.debug(f'HDF5 group: {h5obj.name}')
            zgrp = self._zroot.create_group(h5obj.name)
            self._transfer_attrs(h5obj, zgrp)
Beispiel #5
0
import zarr
import numcodecs
from skimage.data import astronaut

# choose chunks s.t. we do have overhanging edge-chunks
CHUNKS = (100, 100, 1)
STR_TO_COMPRESSOR = {
    'gzip': numcodecs.GZip(),
    'blosc': numcodecs.Blosc(),
    'zlib': numcodecs.Zlib()
}


def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', None]):
    path = '../data/zarr.zr'
    im = astronaut()

    f = zarr.open(path)
    for compressor in compressors:
        name = compressor if compressor is not None else 'raw'
        compressor_impl = STR_TO_COMPRESSOR[
            compressor] if compressor is not None else None
        f.create_dataset(name,
                         data=im,
                         chunks=CHUNKS,
                         compressor=compressor_impl)


# this needs PR https://github.com/zarr-developers/zarr/pull/309
def generate_n5_format(compressors=['gzip', None]):
    path = '../data/zarr.n5'