Exemple #1
0
def concat_chunks_into_dset(matrices, group, dset_name,
                            rows_in_chunk=SNPS_PER_CHUNK):
    matrices = iter(matrices)
    fst_mat = _first_item(matrices)
    if matrices is None:
        raise ValueError('There were no matrices to concatenate')
    mats = chain([fst_mat], matrices)

    size = fst_mat.shape
    kwargs = DEF_DSET_PARAMS.copy()
    kwargs['dtype'] = fst_mat.dtype
    kwargs['maxshape'] = (None,) + size[1:]
    kwargs['chunks'] = (SNPS_PER_CHUNK,) + size[1:]
    dset = group.create_dataset(dset_name, size, **kwargs)

    current_snp_index = 0
    for mat in mats:
        num_snps = mat.shape[0]
        start = current_snp_index
        stop = current_snp_index + num_snps

        current_snps_in_dset = dset.shape[0]
        if current_snps_in_dset < stop:
            dset.resize((stop,) + size[1:])
        dset[start:stop] = mat
        current_snp_index += num_snps

    return dset
Exemple #2
0
    def _create_matrix(self, path, *args, **kwargs):
        hdf5 = self._h5file
        group_name, dset_name = posixpath.split(path)
        if not dset_name:
            msg = 'The path should include a dset name: ' + path
            raise ValueError(msg)

        try:
            hdf5[path]
            msg = 'The dataset already exists: ' + path
            raise ValueError(msg)
        except KeyError:
            pass

        try:
            group = hdf5[group_name]
        except KeyError:
            group = hdf5.create_group(group_name)

        for key, value in DEF_DSET_PARAMS.items():
            if key not in kwargs:
                kwargs[key] = value

        if 'fillvalue' not in kwargs:
            if 'dtype' in kwargs:
                dtype = kwargs['dtype']
            else:
                if len(args) > 2:
                    dtype = args[2]
                else:
                    dtype = None
            if dtype is not None:
                fillvalue = MISSING_VALUES[dtype]
                kwargs['fillvalue'] = fillvalue
        if 'maxshape' not in kwargs:
            kwargs['maxshape'] = (None,) * len(kwargs['shape'])
        args = list(args)
        args.insert(0, dset_name)
        dset = group.create_dataset(*args, **kwargs)
        return dset
    def _create_matrix(self, path, *args, **kwargs):
        hdf5 = self._h5file
        group_name, dset_name = posixpath.split(path)
        if not dset_name:
            msg = 'The path should include a dset name: ' + path
            raise ValueError(msg)

        try:
            hdf5[path]
            msg = 'The dataset already exists: ' + path
            raise ValueError(msg)
        except KeyError:
            pass

        try:
            group = hdf5[group_name]
        except KeyError:
            group = hdf5.create_group(group_name)

        for key, value in DEF_DSET_PARAMS.items():
            if key not in kwargs:
                kwargs[key] = value

        if 'fillvalue' not in kwargs:
            if 'dtype' in kwargs:
                dtype = kwargs['dtype']
            else:
                if len(args) > 2:
                    dtype = args[2]
                else:
                    dtype = None
            if dtype is not None:
                fillvalue = MISSING_VALUES[dtype]
                kwargs['fillvalue'] = fillvalue
        if 'maxshape' not in kwargs:
            kwargs['maxshape'] = (None,) * len(kwargs['shape'])
        args = list(args)
        args.insert(0, dset_name)
        dset = group.create_dataset(*args, **kwargs)
        return dset