def __init__(self, file, signal, expg, **kwargs): super().__init__(file, signal, expg, **kwargs) self.Dataset = zarr.Array self.unicode_kwds = {"dtype": object, "object_codec": numcodecs.JSON()} self.ragged_kwds = { "dtype": object, "object_codec": numcodecs.VLenArray(int), "exact": True }
def store(self, synced_collection, data): dataset = synced_collection.group.require_dataset( "test", overwrite=True, shape=1, dtype="object", object_codec=numcodecs.JSON(), ) dataset[0] = data
def __init__(self, group=None, name=None, codec=None, *args, **kwargs): if not ZARR: raise RuntimeError( "The Zarr package must be installed to use the ZarrCollection." ) super().__init__(**kwargs) self._group = group self._name = name self._object_codec = numcodecs.JSON() if codec is None else codec
def _write_key_value_to_zarr(f, key, value, **kwargs): if isinstance(value, Mapping): for k, v in value.items(): if not isinstance(k, str): warnings.warn('dict key {} transformed to str upon writing to zarr,' 'using string keys is recommended' .format(k)) _write_key_value_to_zarr(f, key + '/' + str(k), v, **kwargs) return def preprocess_writing(value): if value is None: return value elif issparse(value): return value elif isinstance(value, dict): # old hack for storing dicts, is never reached # in the current implementation, can be removed in the future value = np.array([str(value)]) else: # make sure value is an array value = np.array(value) # hm, why that? if value.ndim == 0: value = np.array([value]) # make sure string format is chosen correctly if value.dtype.kind == 'U': value = value.astype(np.string_) return value value = preprocess_writing(value) # for some reason, we need the following for writing string arrays if key in f.keys() and value is not None: del f[key] # ignore arrays with empty dtypes if value is None or not value.dtype.descr: return try: if key in set(f.keys()): import zarr is_valid_group = isinstance(f[key], zarr.hierarchy.Group) \ and f[key].shape == value.shape \ and f[key].dtype == value.dtype if not is_valid_group and not issparse(value): f[key][()] = value return else: del f[key] #f.create_dataset(key, data=value, **kwargs) if key != 'X' and 'chunks' in kwargs: # TODO: make this more explicit del kwargs['chunks'] import numcodecs # TODO: only set object_codec for objects ds = f.create_dataset(key, shape=value.shape, dtype=value.dtype, object_codec=numcodecs.JSON(), **kwargs) _write_in_zarr_chunks(ds, key, value) except TypeError: # try writing as byte strings try: if value.dtype.names is None: if key in set(f.keys()): if (f[key].shape == value.shape and f[key].dtype == value.dtype): f[key][()] = value.astype('S') return else: del f[key] #f.create_dataset(key, data=value.astype('S'), **kwargs) ds = f.create_dataset(key, shape=value.astype('S').shape, dtype=value.astype('S').dtype, **kwargs) _write_in_zarr_chunks(ds, key, value.astype('S')) else: new_dtype = [(dt[0], 'S{}'.format(int(dt[1][2:])*4)) for dt in value.dtype.descr] if key in set(f.keys()): if (f[key].shape == value.shape and f[key].dtype == value.dtype): f[key][()] = value.astype(new_dtype) return else: del f[key] #f.create_dataset( # key, data=value.astype(new_dtype), **kwargs) ds = f.create_dataset(key, shape=value.astype(new_dtype).shape, dtype=value.astype(new_dtype).dtype, **kwargs) _write_in_zarr_chunks(ds, key, value.astype(new_dtype)) except Exception as e: warnings.warn('Could not save field with key = "{}" ' 'to hdf5 file: {}'.format(key, e))