def create_dataset(self, h5g, data=None, **kwargs): # set defaults kwargs.setdefault('name', 'data') for k, v in self.defaults.items(): kwargs.setdefault(k, v) # handle data if data is not None: data = _util.ensure_array_like(data) # by default, simple chunking across rows rowsize = data.dtype.itemsize * reduce(operator.mul, data.shape[1:], 1) # 1Mb chunks chunklen = max(1, (2**20) // rowsize) chunks = (chunklen,) + data.shape[1:] kwargs.setdefault('chunks', chunks) # by default, can resize dim 0 maxshape = (None,) + data.shape[1:] kwargs.setdefault('maxshape', maxshape) # set data kwargs['data'] = data # create dataset h5d = h5g.create_dataset(**kwargs) return h5d
def get_nbytes(data): if hasattr(data, 'nbytes'): return data.nbytes elif is_array_like(data): return reduce(operator.mul, data.shape) * data.dtype.itemsize else: return None
def create_dataset(self, h5g, data=None, **kwargs): # set defaults kwargs.setdefault("name", "data") for k, v in self.defaults.items(): kwargs.setdefault(k, v) # handle data if data is not None: data = _util.ensure_array_like(data) # by default, simple chunking across rows rowsize = data.dtype.itemsize * reduce(operator.mul, data.shape[1:], 1) # 1Mb chunks chunklen = max(1, (2 ** 20) // rowsize) chunks = (chunklen,) + data.shape[1:] kwargs.setdefault("chunks", chunks) # by default, can resize dim 0 maxshape = (None,) + data.shape[1:] kwargs.setdefault("maxshape", maxshape) # set data kwargs["data"] = data # create dataset h5d = h5g.create_dataset(**kwargs) return h5d
def default_chunks(data): # by default, simple chunking across rows rowsize = data.dtype.itemsize * reduce(operator.mul, data.shape[1:], 1) # 1Mb chunks chunklen = max(1, (2 ** 20) // rowsize) chunks = (chunklen,) + data.shape[1:] return chunks
def default_chunks(data, expectedlen): # here we will only ever chunk first dimension rowsize = data.dtype.itemsize if data.ndim > 1: # pretend array is 1D rowsize *= reduce(operator.mul, data.shape[1:]) if expectedlen is None: # default to 4M chunks of first dimension chunklen = 2**22 // rowsize else: # use zarr heuristics chunklen, = zarr.util.guess_chunks((expectedlen, ), rowsize) if data.ndim > 1: chunks = (chunklen, ) + data.shape[1:] else: chunks = chunklen, return chunks