Esempio n. 1
0
    def sr_loop(self,
                kpti_kptj=numpy.zeros((2, 3)),
                max_memory=2000,
                compact=True,
                blksize=None):
        '''Short range part'''
        if self._cderi is None:
            self.build()
        cell = self.cell
        kpti, kptj = kpti_kptj
        unpack = is_zero(kpti - kptj) and not compact
        is_real = is_zero(kpti_kptj)
        nao = cell.nao_nr()
        if blksize is None:
            if is_real:
                blksize = max_memory * 1e6 / 8 / (nao**2 * 2)
            else:
                blksize = max_memory * 1e6 / 16 / (nao**2 * 2)
            blksize /= 2  # For prefetch
            blksize = max(16, min(int(blksize), self.blockdim))
            logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory,
                          blksize)

        def load(aux_slice):
            b0, b1 = aux_slice
            if is_real:
                LpqR = numpy.asarray(j3c[b0:b1])
                if unpack:
                    LpqR = lib.unpack_tril(LpqR).reshape(-1, nao**2)
                LpqI = numpy.zeros_like(LpqR)
            else:
                Lpq = numpy.asarray(j3c[b0:b1])
                LpqR = numpy.asarray(Lpq.real, order='C')
                LpqI = numpy.asarray(Lpq.imag, order='C')
                Lpq = None
                if unpack:
                    LpqR = lib.unpack_tril(LpqR).reshape(-1, nao**2)
                    LpqI = lib.unpack_tril(LpqI,
                                           lib.ANTIHERMI).reshape(-1, nao**2)
            return LpqR, LpqI

        with _load3c(self._cderi, 'j3c', kpti_kptj, 'j3c-kptij') as j3c:
            slices = lib.prange(0, j3c.shape[0], blksize)
            for LpqR, LpqI in lib.map_with_prefetch(load, slices):
                yield LpqR, LpqI, 1
                LpqR = LpqI = None

        if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
            # Truncated Coulomb operator is not postive definite. Load the
            # CDERI tensor of negative part.
            with _load3c(self._cderi,
                         'j3c-',
                         kpti_kptj,
                         'j3c-kptij',
                         ignore_key_error=True) as j3c:
                slices = lib.prange(0, j3c.shape[0], blksize)
                for LpqR, LpqI in lib.map_with_prefetch(load, slices):
                    yield LpqR, LpqI, -1
                    LpqR = LpqI = None
Esempio n. 2
0
    def loop(self, blksize=None):
        if self._cderi is None:
            self.build()
        if blksize is None:
            blksize = self.blockdim

        with addons.load(self._cderi, 'j3c') as feri:
            if isinstance(feri, numpy.ndarray):
                naoaux = feri.shape[0]
                for b0, b1 in self.prange(0, naoaux, blksize):
                    yield numpy.asarray(feri[b0:b1], order='C')

            else:
                if isinstance(feri, h5py.Group):
                    # starting from pyscf-1.7, DF tensor may be stored in
                    # block format
                    naoaux = feri['0'].shape[0]

                    def load(aux_slice):
                        b0, b1 = aux_slice
                        return _load_from_h5g(feri, b0, b1)
                else:
                    naoaux = feri.shape[0]

                    def load(aux_slice):
                        b0, b1 = aux_slice
                        return numpy.asarray(feri[b0:b1])

                for dat in lib.map_with_prefetch(
                        load, self.prange(0, naoaux, blksize)):
                    yield dat
                    dat = None
Esempio n. 3
0
def cholesky_eri(mol, erifile, auxbasis='weigend+etb', dataname='j3c', tmpdir=None,
                 int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1,
                 max_memory=MAX_MEMORY, auxmol=None, verbose=logger.NOTE):
    '''3-index density-fitting tensor.
    '''
    assert(aosym in ('s1', 's2ij'))
    assert(comp == 1)
    log = logger.new_logger(mol, verbose)
    time0 = (time.clock(), time.time())

    if auxmol is None:
        auxmol = make_auxmol(mol, auxbasis)

    if tmpdir is None:
        tmpdir = lib.param.TMPDIR
    swapfile = tempfile.NamedTemporaryFile(dir=tmpdir)
    cholesky_eri_b(mol, swapfile.name, auxbasis, dataname,
                   int3c, aosym, int2c, comp, max_memory, auxmol, verbose=log)
    fswap = h5py.File(swapfile.name, 'r')
    time1 = log.timer('generate (ij|L) 1 pass', *time0)

    # Cannot let naoaux = auxmol.nao_nr() if auxbasis has linear dependence
    nao = mol.nao_nr()
    if aosym == 's1':
        nao_pair = nao * nao
    else:
        nao_pair = nao * (nao+1) // 2

    feri = _create_h5file(erifile, dataname)
    if comp == 1:
        naoaux = fswap['%s/0'%dataname].shape[0]
        h5d_eri = feri.create_dataset(dataname, (naoaux,nao_pair), 'f8')
    else:
        naoaux = fswap['%s/0'%dataname].shape[1]
        h5d_eri = feri.create_dataset(dataname, (comp,naoaux,nao_pair), 'f8')

    iolen = min(max(int(max_memory*.45e6/8/nao_pair), 28), naoaux)
    totstep = (naoaux+iolen-1)//iolen
    def load(row_slice):
        row0, row1 = row_slice
        return _load_from_h5g(fswap[dataname], row0, row1)

    ti0 = time1
    slices = list(lib.prange(0, naoaux, iolen))
    for istep, dat in enumerate(lib.map_with_prefetch(load, slices)):
        row0, row1 = slices[istep]
        nrow = row1 - row0
        if comp == 1:
            h5d_eri[row0:row1] = dat
        else:
            h5d_eri[:,row0:row1] = dat
        dat = None
        ti0 = log.timer('step 2 [%d/%d], [%d:%d], row = %d'%
                        (istep+1, totstep, row0, row1, nrow), *ti0)

    fswap.close()
    feri.close()
    log.timer('cholesky_eri', *time0)
    return erifile
Esempio n. 4
0
    def make_kpt(uniq_kptji_id, cholesky_j2c):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp',
                                      hermi=1,
                                      kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1),
                     nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16,
                           int(max_memory * .1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(16,
                           int(max_memory * .2e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngrids, 16384)

        def load(aux_slice):
            col0, col1 = aux_slice
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.vstack([
                    fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T
                    for i in range(nsegs)
                ])
                # vbar is the interaction between the background charge
                # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
                if is_zero(kpt) and cell.dimension == 3:
                    for i in numpy.where(vbar != 0)[0]:
                        v[i] -= vbar[i] * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
            return j3cR, j3cI

        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128)
        cols = [sh_range[2] for sh_range in shranges]
        locs = numpy.append(0, numpy.cumsum(cols))
        tasks = zip(locs[:-1], locs[1:])
        for istep, (j3cR,
                    j3cI) in enumerate(lib.map_with_prefetch(load, tasks)):
            bstart, bend, ncol = shranges[istep]
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1,
                       len(shranges), bstart, bend, ncol)
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell,
                                            Gv[p0:p1],
                                            shls_slice,
                                            aosym,
                                            b,
                                            gxyz[p0:p1],
                                            Gvbase,
                                            kpt,
                                            adapted_kptjs,
                                            out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG, ncol)
                    pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c,
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                    feri['j3c/%d/%d' % (ji, istep)] = v
                else:
                    feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v)
            j3cR = j3cI = None

        for ji in adapted_ji_idx:
            del (fswap['j3c-junk/%d' % ji])
Esempio n. 5
0
def cholesky_eri_b(mol, erifile, auxbasis='weigend+etb', dataname='j3c',
                   int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1,
                   max_memory=MAX_MEMORY, auxmol=None, verbose=logger.NOTE):
    '''3-center 2-electron DF tensor. Similar to cholesky_eri while this
    function stores DF tensor in blocks.
    '''
    assert(aosym in ('s1', 's2ij'))
    log = logger.new_logger(mol, verbose)
    time0 = (time.clock(), time.time())

    if auxmol is None:
        auxmol = make_auxmol(mol, auxbasis)
    j2c = auxmol.intor(int2c, hermi=1)
    log.debug('size of aux basis %d', j2c.shape[0])
    time1 = log.timer('2c2e', *time0)
    try:
        low = scipy.linalg.cholesky(j2c, lower=True)
        tag = 'cd'
    except scipy.linalg.LinAlgError:
        w, v = scipy.linalg.eigh(j2c)
        idx = w > LINEAR_DEP_THR
        low = (v[:,idx] / numpy.sqrt(w[idx]))
        v = None
        tag = 'eig'
    j2c = None
    naoaux, naux = low.shape
    time1 = log.timer('Cholesky 2c2e', *time1)

    int3c = gto.moleintor.ascint3(mol._add_suffix(int3c))
    atm, bas, env = gto.mole.conc_env(mol._atm, mol._bas, mol._env,
                                      auxmol._atm, auxmol._bas, auxmol._env)
    ao_loc = gto.moleintor.make_loc(bas, int3c)
    nao = ao_loc[mol.nbas]
    naoaux = ao_loc[-1] - nao
    if aosym == 's1':
        nao_pair = nao * nao
        buflen = min(max(int(max_memory*.24e6/8/naoaux/comp), 1), nao_pair)
        shranges = _guess_shell_ranges(mol, buflen, 's1')
    else:
        nao_pair = nao * (nao+1) // 2
        buflen = min(max(int(max_memory*.24e6/8/naoaux/comp), 1), nao_pair)
        shranges = _guess_shell_ranges(mol, buflen, 's2ij')
    log.debug('erifile %.8g MB, IO buf size %.8g MB',
              naoaux*nao_pair*8/1e6, comp*buflen*naoaux*8/1e6)
    log.debug1('shranges = %s', shranges)
    # TODO: Libcint-3.14 and newer version support to compute int3c2e without
    # the opt for the 3rd index.
    #if '3c2e' in int3c:
    #    cintopt = gto.moleintor.make_cintopt(atm, mol._bas, env, int3c)
    #else:
    #    cintopt = gto.moleintor.make_cintopt(atm, bas, env, int3c)
    cintopt = gto.moleintor.make_cintopt(atm, bas, env, int3c)
    bufs1 = numpy.empty((comp*max([x[2] for x in shranges]),naoaux))

    def transform(b):
        if b.ndim == 3 and b.flags.f_contiguous:
            b = lib.transpose(b.T, axes=(0,2,1)).reshape(naoaux,-1)
        else:
            b = b.reshape((-1,naoaux)).T
        if tag == 'cd':
            if b.flags.c_contiguous:
                b = lib.transpose(b).T
            return scipy.linalg.solve_triangular(low, b, lower=True,
                                                 overwrite_b=True, check_finite=False)
        else:
            return lib.dot(low.T, b)

    def process(sh_range):
        bstart, bend, nrow = sh_range
        shls_slice = (bstart, bend, 0, mol.nbas, mol.nbas, mol.nbas+auxmol.nbas)
        ints = gto.moleintor.getints3c(int3c, atm, bas, env, shls_slice, comp,
                                       aosym, ao_loc, cintopt, out=bufs1)
        if comp == 1:
            dat = transform(ints)
        else:
            dat = [transform(x) for x in ints]
        return dat

    feri = _create_h5file(erifile, dataname)
    for istep, dat in enumerate(lib.map_with_prefetch(process, shranges)):
        sh_range = shranges[istep]
        label = '%s/%d'%(dataname,istep)
        if comp == 1:
            feri[label] = dat
        else:
            shape = (len(dat),) + dat[0].shape
            fdat = feri.create_dataset(label, shape, dat[0].dtype.char)
            for i, b in enumerate(dat):
                fdat[i] = b
        dat = None
        log.debug('int3c2e [%d/%d], AO [%d:%d], nrow = %d',
                  istep+1, len(shranges), *sh_range)
        time1 = log.timer('gen CD eri [%d/%d]' % (istep+1,len(shranges)), *time1)
    bufs1 = None
    feri.close()
    return erifile
Esempio n. 6
0
def general(mol, mo_coeffs, erifile, auxbasis='weigend+etb', dataname='eri_mo', tmpdir=None,
            int3c='int3c2e', aosym='s2ij', int2c='int2c2e', comp=1,
            max_memory=MAX_MEMORY, verbose=0, compact=True):
    ''' Transform ij of (ij|L) to MOs.
    '''
    assert(aosym in ('s1', 's2ij'))
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mol, verbose)

    if tmpdir is None:
        tmpdir = lib.param.TMPDIR
    swapfile = tempfile.NamedTemporaryFile(dir=tmpdir)
    cholesky_eri_b(mol, swapfile.name, auxbasis, dataname,
                   int3c, aosym, int2c, comp, max_memory, verbose=log)
    fswap = h5py.File(swapfile.name, 'r')
    time1 = log.timer('AO->MO eri transformation 1 pass', *time0)

    nao = mo_coeffs[0].shape[0]
    if aosym == 's1':
        nao_pair = nao * nao
        aosym_as_nr_e2 = 's1'
    else:
        nao_pair = nao * (nao+1) // 2
        aosym_as_nr_e2 = 's2kl'

    ijmosym, nij_pair, moij, ijshape = \
            ao2mo.incore._conc_mos(mo_coeffs[0], mo_coeffs[1],
                                   compact and aosym != 's1')

    naoaux = fswap['%s/0'%dataname].shape[-2]
    feri = _create_h5file(erifile, dataname)
    if comp == 1:
        h5d_eri = feri.create_dataset(dataname, (naoaux,nij_pair), 'f8')
    else:
        h5d_eri = feri.create_dataset(dataname, (comp,naoaux,nij_pair), 'f8')

    def load(row_slice):
        row0, row1 = row_slice
        return _load_from_h5g(fswap[dataname], row0, row1)

    iolen = min(max(int(max_memory*.45e6/8/(nao_pair+nij_pair)), 28), naoaux)
    totstep = (naoaux+iolen-1)//iolen
    ti0 = time1
    slices = list(lib.prange(0, naoaux, iolen))
    for istep, dat in enumerate(lib.map_with_prefetch(load, slices)):
        row0, row1 = slices[istep]
        nrow = row1 - row0
        if comp == 1:
            dat = _ao2mo.nr_e2(dat, moij, ijshape, aosym_as_nr_e2, ijmosym)
            h5d_eri[row0:row1] = dat
        else:
            dat = _ao2mo.nr_e2(dat.reshape(comp*nrow, nao_pair),
                               moij, ijshape, aosym_as_nr_e2, ijmosym)
            h5d_eri[:,row0:row1] = dat.reshape(comp, nrow, nij_pair)
        dat = None
        log.debug('step 2 [%d/%d], [%d:%d], row = %d',
                  istep+1, totstep, row0, row1, nrow)
        ti0 = log.timer('step 2 [%d/%d], [%d:%d], row = %d'%
                        (istep+1, totstep, row0, row1, nrow), *ti0)

    fswap.close()
    feri.close()
    log.timer('AO->MO CD eri transformation 2 pass', *time1)
    log.timer('AO->MO CD eri transformation', *time0)
    return erifile
Esempio n. 7
0
def _aux_e2(cell,
            auxcell,
            erifile,
            intor='int3c2e',
            aosym='s2ij',
            comp=None,
            kptij_lst=None,
            dataname='eri_mo',
            shls_slice=None,
            max_memory=2000,
            verbose=0):
    r'''3-center AO integrals (ij|L) with double lattice sum:
    \sum_{lm} (i[l]j[m]|L[0]), where L is the auxiliary basis.
    Three-index integral tensor (kptij_idx, nao_pair, naux) or four-index
    integral tensor (kptij_idx, comp, nao_pair, naux) are stored on disk.

    **This function should be only used by df and mdf initialization function
    _make_j3c**

    Args:
        kptij_lst : (*,2,3) array
            A list of (kpti, kptj)
    '''
    intor, comp = gto.moleintor._get_intor_and_comp(cell._add_suffix(intor),
                                                    comp)

    if isinstance(erifile, h5py.Group):
        feri = erifile
    elif h5py.is_hdf5(erifile):
        feri = h5py.File(erifile, 'a')
    else:
        feri = h5py.File(erifile, 'w')
    if dataname in feri:
        del (feri[dataname])
    if dataname + '-kptij' in feri:
        del (feri[dataname + '-kptij'])

    if kptij_lst is None:
        kptij_lst = numpy.zeros((1, 2, 3))
    feri[dataname + '-kptij'] = kptij_lst

    if shls_slice is None:
        shls_slice = (0, cell.nbas, 0, cell.nbas, 0, auxcell.nbas)

    ao_loc = cell.ao_loc_nr()
    aux_loc = auxcell.ao_loc_nr(auxcell.cart
                                or 'ssc' in intor)[:shls_slice[5] + 1]
    ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]]
    nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]]
    nkptij = len(kptij_lst)

    nii = (ao_loc[shls_slice[1]] * (ao_loc[shls_slice[1]] + 1) // 2 -
           ao_loc[shls_slice[0]] * (ao_loc[shls_slice[0]] + 1) // 2)
    nij = ni * nj

    kpti = kptij_lst[:, 0]
    kptj = kptij_lst[:, 1]
    aosym_ks2 = abs(kpti - kptj).sum(axis=1) < KPT_DIFF_TOL
    j_only = numpy.all(aosym_ks2)
    #aosym_ks2 &= (aosym[:2] == 's2' and shls_slice[:2] == shls_slice[2:4])
    aosym_ks2 &= aosym[:2] == 's2'

    if j_only and aosym[:2] == 's2':
        assert (shls_slice[2] == 0)
        nao_pair = nii
    else:
        nao_pair = nij

    if gamma_point(kptij_lst):
        dtype = numpy.double
    else:
        dtype = numpy.complex128

    buflen = max(8, int(max_memory * .47e6 / 16 / (nkptij * ni * nj * comp)))
    auxdims = aux_loc[shls_slice[4] + 1:shls_slice[5] +
                      1] - aux_loc[shls_slice[4]:shls_slice[5]]
    auxranges = balance_segs(auxdims, buflen)
    buflen = max([x[2] for x in auxranges])
    buf = numpy.empty(nkptij * comp * ni * nj * buflen, dtype=dtype)
    bufs = [buf, numpy.empty_like(buf)]
    int3c = wrap_int3c(cell, auxcell, intor, aosym, comp, kptij_lst)

    def process(aux_range):
        sh0, sh1, nrow = aux_range
        sub_slice = (shls_slice[0], shls_slice[1], shls_slice[2],
                     shls_slice[3], shls_slice[4] + sh0, shls_slice[4] + sh1)
        mat = numpy.ndarray((nkptij, comp, nao_pair, nrow),
                            dtype=dtype,
                            buffer=bufs[0])
        bufs[:] = bufs[1], bufs[0]
        int3c(sub_slice, mat)
        return mat

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # sorted_ij_idx: Sort and group the kptij_lst according to the ordering in
    # df._make_j3c to reduce the data fragment in the hdf5 file.  When datasets
    # are written to hdf5, they are saved sequentially. If the integral data are
    # saved as the order of kptij_lst, removing the datasets in df._make_j3c will
    # lead to disk space fragment that can not be reused.
    sorted_ij_idx = numpy.hstack(
        [numpy.where(uniq_inverse == k)[0] for k, kpt in enumerate(uniq_kpts)])
    tril_idx = numpy.tril_indices(ni)
    tril_idx = tril_idx[0] * ni + tril_idx[1]

    for istep, mat in enumerate(lib.map_with_prefetch(process, auxranges)):
        for k in sorted_ij_idx:
            v = mat[k]
            if gamma_point(kptij_lst[k]):
                v = v.real
            if aosym_ks2[k] and nao_pair == ni**2:
                v = v[:, tril_idx]
            feri['%s/%d/%d' % (dataname, k, istep)] = v
        mat = None

    if not isinstance(erifile, h5py.Group):
        feri.close()
    return erifile