コード例 #1
0
ファイル: storage_hdf5.py プロジェクト: nvictus/scikit-allel
    def create_dataset(self, h5g, data=None, **kwargs):

        # set defaults
        kwargs.setdefault('name', 'data')
        for k, v in self.defaults.items():
            kwargs.setdefault(k, v)

        # handle data
        if data is not None:
            data = _util.ensure_array_like(data)

            # by default, simple chunking across rows
            rowsize = data.dtype.itemsize * reduce(operator.mul,
                                                   data.shape[1:], 1)
            # 1Mb chunks
            chunklen = max(1, (2**20) // rowsize)
            chunks = (chunklen,) + data.shape[1:]
            kwargs.setdefault('chunks', chunks)

            # by default, can resize dim 0
            maxshape = (None,) + data.shape[1:]
            kwargs.setdefault('maxshape', maxshape)

            # set data
            kwargs['data'] = data

        # create dataset
        h5d = h5g.create_dataset(**kwargs)

        return h5d
コード例 #2
0
def get_nbytes(data):
    if hasattr(data, 'nbytes'):
        return data.nbytes
    elif is_array_like(data):
        return reduce(operator.mul, data.shape) * data.dtype.itemsize
    else:
        return None
コード例 #3
0
    def create_dataset(self, h5g, data=None, **kwargs):

        # set defaults
        kwargs.setdefault("name", "data")
        for k, v in self.defaults.items():
            kwargs.setdefault(k, v)

        # handle data
        if data is not None:
            data = _util.ensure_array_like(data)

            # by default, simple chunking across rows
            rowsize = data.dtype.itemsize * reduce(operator.mul, data.shape[1:], 1)
            # 1Mb chunks
            chunklen = max(1, (2 ** 20) // rowsize)
            chunks = (chunklen,) + data.shape[1:]
            kwargs.setdefault("chunks", chunks)

            # by default, can resize dim 0
            maxshape = (None,) + data.shape[1:]
            kwargs.setdefault("maxshape", maxshape)

            # set data
            kwargs["data"] = data

        # create dataset
        h5d = h5g.create_dataset(**kwargs)

        return h5d
コード例 #4
0
def default_chunks(data):
    # by default, simple chunking across rows
    rowsize = data.dtype.itemsize * reduce(operator.mul,
                                           data.shape[1:], 1)
    # 1Mb chunks
    chunklen = max(1, (2 ** 20) // rowsize)
    chunks = (chunklen,) + data.shape[1:]
    return chunks
コード例 #5
0
def default_chunks(data, expectedlen):
    # here we will only ever chunk first dimension
    rowsize = data.dtype.itemsize
    if data.ndim > 1:
        # pretend array is 1D
        rowsize *= reduce(operator.mul, data.shape[1:])
    if expectedlen is None:
        # default to 4M chunks of first dimension
        chunklen = 2**22 // rowsize
    else:
        # use zarr heuristics
        chunklen, = zarr.util.guess_chunks((expectedlen, ), rowsize)
    if data.ndim > 1:
        chunks = (chunklen, ) + data.shape[1:]
    else:
        chunks = chunklen,
    return chunks