Esempio n. 1
0
def _ao2mo_ovov(mp, orbs, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)
    orboa = numpy.asarray(orbs[0], order='F')
    orbva = numpy.asarray(orbs[1], order='F')
    orbob = numpy.asarray(orbs[2], order='F')
    orbvb = numpy.asarray(orbs[3], order='F')
    nao, nocca = orboa.shape
    noccb = orbob.shape[1]
    nvira = orbva.shape[1]
    nvirb = orbvb.shape[1]

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nbas = mol.nbas
    assert (nvira <= nao)
    assert (nvirb <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(
        4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocca)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao))
    ftmp = lib.H5TmpFile()
    disk = (nocca**2 * (nao * (nao + dmax) / 2 + nvira**2) + noccb**2 *
            (nao * (nao + dmax) / 2 + nvirb**2) + nocca * noccb *
            (nao**2 + nvira * nvirb))
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax, disk * 8 / 1e6)

    fint = gto.moleintor.getints4c
    aa_blk_slices = []
    ab_blk_slices = []
    count_ab = 0
    count_aa = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ish0, ish1, ni in sh_ranges:
            for jsh0, jsh1, nj in sh_ranges:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]

                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0,
                                       nbas),
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = lib.ddot(orboa.T,
                                 eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao))
                tmp_li = lib.ddot(
                    orbob.T,
                    tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T)
                tmp_li = tmp_li.reshape(noccb, nocca, (i1 - i0), (j1 - j0))
                save('ab/%d' % count_ab, tmp_li.transpose(1, 0, 2, 3))
                ab_blk_slices.append((i0, i1, j0, j1))
                count_ab += 1

                if ish0 >= jsh0:
                    tmp_li = lib.ddot(
                        orboa.T,
                        tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T)
                    tmp_li = tmp_li.reshape(nocca, nocca, (i1 - i0), (j1 - j0))
                    save('aa/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3))

                    tmp_i = lib.ddot(
                        orbob.T, eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao))
                    tmp_li = lib.ddot(
                        orbob.T,
                        tmp_i.reshape(noccb * (i1 - i0) * (j1 - j0), nao).T)
                    tmp_li = tmp_li.reshape(noccb, noccb, (i1 - i0), (j1 - j0))
                    save('bb/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3))
                    aa_blk_slices.append((i0, i1, j0, j1))
                    count_aa += 1

                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = None

    fovov = feri.create_dataset('ovov', (nocca * nvira, nocca * nvira),
                                'f8',
                                chunks=(nvira, nvira))
    fovOV = feri.create_dataset('ovOV', (nocca * nvira, noccb * nvirb),
                                'f8',
                                chunks=(nvira, nvirb))
    fOVOV = feri.create_dataset('OVOV', (noccb * nvirb, noccb * nvirb),
                                'f8',
                                chunks=(nvirb, nvirb))
    occblk = int(
        min(max(nocca, noccb),
            max(4, 250 / nocca, max_memory * .9e6 / 8 / (nao**2 * nocca) / 5)))

    def load_aa(h5g, nocc, i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(aa_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(h5g[str(k)][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def load_ab(h5g, nocca, i0, eri):
        if i0 < nocca:
            i1 = min(i0 + occblk, nocca)
            for k, (p0, p1, q0, q1) in enumerate(ab_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1]

    def save(h5dat, nvir, i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(nvir, -1)

    with lib.call_in_background(save) as bsave:
        with lib.call_in_background(load_aa) as prefetch:
            buf_prefecth = numpy.empty((occblk, nocca, nao, nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['aa'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['aa'], nocca, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocca, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbva, (0, nvira, 0, nvira), 's1',
                                   's1')
                bsave(
                    fovov, nvira, i0, i1,
                    dat.reshape(i1 - i0, nocca, nvira,
                                nvira).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for aa [%d:%d]' % (i0, i1), *time1)

            buf_prefecth = numpy.empty((occblk, noccb, nao, nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['bb'], noccb, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, noccb, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['bb'], noccb, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbvb, (0, nvirb, 0, nvirb), 's1',
                                   's1')
                bsave(
                    fOVOV, nvirb, i0, i1,
                    dat.reshape(i1 - i0, noccb, nvirb,
                                nvirb).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for bb [%d:%d]' % (i0, i1), *time1)

        orbvab = numpy.asarray(numpy.hstack((orbva, orbvb)), order='F')
        with lib.call_in_background(load_ab) as prefetch:
            load_ab(ftmp['ab'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['ab'], nocca, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbvab,
                                   (0, nvira, nvira, nvira + nvirb), 's1',
                                   's1')
                bsave(
                    fovOV, nvira, i0, i1,
                    dat.reshape(i1 - i0, noccb, nvira,
                                nvirb).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for ab [%d:%d]' % (i0, i1), *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
Esempio n. 2
0
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nao, nocc = orbo.shape
    nvir = orbv.shape[1]
    nbas = mol.nbas
    assert (nvir <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(
        4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocc)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax,
              nocc**2 * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6)

    buf_i = numpy.empty((nocc * dmax**2 * nao))
    buf_li = numpy.empty((nocc**2 * dmax**2))
    buf1 = numpy.empty_like(buf_li)

    fint = gto.moleintor.getints4c
    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip + 1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0, i1, j0, j1))

                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0,
                                       nbas),
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = numpy.ndarray((nocc, (i1 - i0) * (j1 - j0) * nao),
                                      buffer=buf_i)
                tmp_li = numpy.ndarray((nocc, nocc * (i1 - i0) * (j1 - j0)),
                                       buffer=buf_li)
                lib.ddot(orbo.T,
                         eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao),
                         c=tmp_i)
                lib.ddot(orbo.T,
                         tmp_i.reshape(nocc * (i1 - i0) * (j1 - j0), nao).T,
                         c=tmp_li)
                tmp_li = tmp_li.reshape(nocc, nocc, (i1 - i0), (j1 - j0))
                save(str(count), tmp_li.transpose(1, 0, 2, 3))
                buf_li, buf1 = buf1, buf_li
                count += 1
                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None

    h5dat = feri.create_dataset('ovov', (nocc * nvir, nocc * nvir),
                                'f8',
                                chunks=(nvir, nvir))
    occblk = int(
        min(nocc,
            max(4, 250 / nocc, max_memory * .9e6 / 8 / (nao**2 * nocc) / 5)))

    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(jk_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = ftmp[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k)][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def save(i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(
                nvir, nocc * nvir)

    orbv = numpy.asarray(orbv, order='F')
    buf_prefecth = numpy.empty((occblk, nocc, nao, nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk * nocc, nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocc, nao, nao)

                dat = _ao2mo.nr_e2(eri,
                                   orbv, (0, nvir, 0, nvir),
                                   's1',
                                   's1',
                                   out=bufw)
                bsave(
                    i0, i1,
                    dat.reshape(i1 - i0, nocc, nvir,
                                nvir).transpose(0, 2, 1, 3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1),
                                         *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    return h5dat
Esempio n. 3
0
def half_e1(mol, mo_coeffs, swapfile,
            intor='int2e', aosym='s4', comp=1,
            max_memory=MAX_MEMORY, ioblk_size=IOBLK_SIZE, verbose=logger.WARN,
            compact=True, ao2mopt=None):
    r'''Half transform arbitrary spherical AO integrals to MO integrals
    for the given two sets of orbitals

    Args:
        mol : :class:`Mole` object
            AO integrals will be generated in terms of mol._atm, mol._bas, mol._env
        mo_coeff : ndarray
            Transform (ij|kl) with the same set of orbitals.
        swapfile : str or h5py File or h5py Group object
            To store the transformed integrals, in HDF5 format.  The transformed
            integrals are saved in blocks.

    Kwargs
        intor : str
            Name of the 2-electron integral.  Ref to :func:`getints_by_shell`
            for the complete list of available 2-electron integral names
        aosym : int or str
            Permutation symmetry for the AO integrals

            | 4 or '4' or 's4': 4-fold symmetry (default)
            | '2ij' or 's2ij' : symmetry between i, j in (ij|kl)
            | '2kl' or 's2kl' : symmetry between k, l in (ij|kl)
            | 1 or '1' or 's1': no symmetry
            | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO)
            | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO)
            | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO)
            | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO)

        comp : int
            Components of the integrals, e.g. int2e_ip_sph has 3 components.
        verbose : int
            Print level
        max_memory : float or int
            The maximum size of cache to use (in MB), large cache may **not**
            improve performance.
        ioblk_size : float or int
            The block size for IO, large block size may **not** improve performance
        verbose : int
            Print level
        compact : bool
            When compact is True, depending on the four oribital sets, the
            returned MO integrals has (up to 4-fold) permutation symmetry.
            If it's False, the function will abandon any permutation symmetry,
            and return the "plain" MO integrals
        ao2mopt : :class:`AO2MOpt` object
            Precomputed data to improve perfomance

    Returns:
        None

    '''
    if any(c.dtype == numpy.complex128 for c in mo_coeffs):
        raise NotImplementedError('Integral transformation for complex orbitals')

    intor = mol._add_suffix(intor)
    time0 = (logger.process_clock(), logger.perf_counter())
    log = logger.new_logger(mol, verbose)

    nao = mo_coeffs[0].shape[0]
    aosym = _stand_sym_code(aosym)
    if aosym in ('s4', 's2ij'):
        nao_pair = nao * (nao+1) // 2
    else:
        nao_pair = nao * nao

    ijmosym, nij_pair, moij, ijshape = \
            incore._conc_mos(mo_coeffs[0], mo_coeffs[1],
                             compact and aosym in ('s4', 's2ij'))

    e1buflen, mem_words, iobuf_words, ioblk_words = \
            guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp)
    ioblk_size = ioblk_words * 8/1e6
# The buffer to hold AO integrals in C code, see line (@)
    aobuflen = max(int((mem_words - 2*comp*e1buflen*nij_pair) // (nao_pair*comp)),
                   IOBUF_ROW_MIN)
    ao_loc = mol.ao_loc_nr('_cart' in intor)
    shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen,
                                  aobuflen, ao_loc)
    if ao2mopt is None:
        if intor == 'int2e_cart' or intor == 'int2e_sph':
            ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
        else:
            ao2mopt = _ao2mo.AO2MOpt(mol, intor)

    if isinstance(swapfile, h5py.Group):
        fswap = swapfile
    else:
        fswap = lib.H5TmpFile(swapfile)
    for icomp in range(comp):
        fswap.create_group(str(icomp)) # for h5py old version

    log.debug('step1: tmpfile %s  %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6)
    log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB',
              nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6)
    nstep = len(shranges)
    e1buflen = max([x[2] for x in shranges])

    e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, e1buflen)
    def save(istep, iobuf):
        for icomp in range(comp):
            _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp],
                              e2buflen, None)

    # transform e1
    ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0)
    with lib.call_in_background(save) as async_write:
        buf1 = numpy.empty((comp*e1buflen,nao_pair))
        buf2 = numpy.empty((comp*e1buflen,nij_pair))
        buf_write = numpy.empty_like(buf2)
        fill = _ao2mo.nr_e1fill
        f_e1 = _ao2mo.nr_e1
        for istep,sh_range in enumerate(shranges):
            log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d',
                       istep+1, nstep, *(sh_range[:3]))
            buflen = sh_range[2]
            iobuf = numpy.ndarray((comp,buflen,nij_pair), buffer=buf2)
            nmic = len(sh_range[3])
            p1 = 0
            for imic, aoshs in enumerate(sh_range[3]):
                log.debug2('      fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d',
                           imic+1, nmic, *aoshs)
                buf = fill(intor, aoshs, mol._atm, mol._bas, mol._env,
                           aosym, comp, ao2mopt, out=buf1).reshape(-1,nao_pair)
                buf = f_e1(buf, moij, ijshape, aosym, ijmosym)
                p0, p1 = p1, p1 + aoshs[2]
                iobuf[:,p0:p1] = buf.reshape(comp,aoshs[2],nij_pair)
            ti0 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)

            async_write(istep, iobuf)
            buf2, buf_write = buf_write, buf2

    fswap = None
    return swapfile
Esempio n. 4
0
def _make_ao_ints(mol, mo_coeff, nocc, dtype):
    NS = ctf.SYM.NS
    SY = ctf.SYM.SY

    ao_loc = mol.ao_loc_nr()
    mo = np.asarray(mo_coeff, order='F')
    nao, nmo = mo.shape
    nvir = nmo - nocc

    ppoo = ctf.tensor((nao, nao, nocc, nocc),
                      sym=[SY, NS, NS, NS],
                      dtype=dtype)
    ppov = ctf.tensor((nao, nao, nocc, nvir),
                      sym=[SY, NS, NS, NS],
                      dtype=dtype)
    ppvv = ctf.tensor((nao, nao, nvir, nvir),
                      sym=[SY, NS, SY, NS],
                      dtype=dtype)
    intor = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    blksize = int(max(4, min(nao / 3, nao / size**.5, 2000e6 / 8 / nao**3)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, blksize)
    tasks = []
    for k, (ish0, ish1, di) in enumerate(sh_ranges):
        for jsh0, jsh1, dj in sh_ranges[:k + 1]:
            tasks.append((ish0, ish1, jsh0, jsh1))

    sqidx = np.arange(nao**2).reshape(nao, nao)
    trilidx = sqidx[np.tril_indices(nao)]
    vsqidx = np.arange(nvir**2).reshape(nvir, nvir)
    vtrilidx = vsqidx[np.tril_indices(nvir)]

    subtasks = list(static_partition(tasks))
    ntasks = max(comm.allgather(len(subtasks)))
    for itask in range(ntasks):
        if itask >= len(subtasks):
            ppoo.write([], [])
            ppov.write([], [])
            ppvv.write([], [])
            continue

        shls_slice = subtasks[itask]
        ish0, ish1, jsh0, jsh1 = shls_slice
        i0, i1 = ao_loc[ish0], ao_loc[ish1]
        j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
        di = i1 - i0
        dj = j1 - j0
        if i0 != j0:
            eri = gto.moleintor.getints4c(intor,
                                          mol._atm,
                                          mol._bas,
                                          mol._env,
                                          shls_slice=shls_slice,
                                          aosym='s2kl',
                                          ao_loc=ao_loc,
                                          cintopt=ao2mopt._cintopt)
            idx = sqidx[i0:i1, j0:j1].ravel()

            eri = _ao2mo.nr_e2(eri.reshape(di * dj, -1), mo, (0, nmo, 0, nmo),
                               's2kl', 's1')
        else:
            eri = gto.moleintor.getints4c(intor,
                                          mol._atm,
                                          mol._bas,
                                          mol._env,
                                          shls_slice=shls_slice,
                                          aosym='s4',
                                          ao_loc=ao_loc,
                                          cintopt=ao2mopt._cintopt)
            eri = _ao2mo.nr_e2(eri, mo, (0, nmo, 0, nmo), 's4', 's1')
            idx = sqidx[i0:i1, j0:j1][np.tril_indices(i1 - i0)]

        ooidx = idx[:, None] * nocc**2 + np.arange(nocc**2)
        ovidx = idx[:, None] * (nocc * nvir) + np.arange(nocc * nvir)
        vvidx = idx[:, None] * nvir**2 + vtrilidx
        eri = eri.reshape(-1, nmo, nmo)
        ppoo.write(ooidx.ravel(), eri[:, :nocc, :nocc].ravel())
        ppov.write(ovidx.ravel(), eri[:, :nocc, nocc:].ravel())
        ppvv.write(vvidx.ravel(),
                   pyscflib.pack_tril(eri[:, nocc:, nocc:]).ravel())
        idx = eri = None
    return ppoo, ppov, ppvv
Esempio n. 5
0
def trans_e1_outcore(mol,
                     mo,
                     ncore,
                     ncas,
                     erifile,
                     max_memory=None,
                     level=1,
                     verbose=logger.WARN):
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)
    log.debug1('trans_e1_outcore level %d  max_memory %d', level, max_memory)
    nao, nmo = mo.shape
    nao_pair = nao * (nao + 1) // 2
    nocc = ncore + ncas

    _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
    faapp_buf = h5py.File(_tmpfile1.name)
    feri = h5py.File(erifile, 'w')

    mo_c = numpy.asarray(mo, order='C')
    mo = numpy.asarray(mo, order='F')
    pashape = (0, nmo, ncore, nocc)
    papa_buf = numpy.zeros((nao, ncas, nmo * ncas))
    j_pc = numpy.zeros((nmo, ncore))
    k_pc = numpy.zeros((nmo, ncore))

    mem_words = int(max(2000, max_memory - papa_buf.nbytes / 1e6) * 1e6 / 8)
    aobuflen = mem_words // (nao_pair + nocc * nmo) + 1
    ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32)
    shranges = outcore.guess_shell_ranges(mol, True, aobuflen, None, ao_loc)
    intor = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nstep = len(shranges)
    paapp = 0
    maxbuflen = max([x[2] for x in shranges])
    log.debug('mem_words %.8g MB, maxbuflen = %d', mem_words * 8 / 1e6,
              maxbuflen)
    bufs1 = numpy.empty((maxbuflen, nao_pair))
    bufs2 = numpy.empty((maxbuflen, nmo * ncas))
    if level == 1:
        bufs3 = numpy.empty((maxbuflen, nao * ncore))
        log.debug('mem cache %.8g MB',
                  (bufs1.nbytes + bufs2.nbytes + bufs3.nbytes) / 1e6)
    else:
        log.debug('mem cache %.8g MB', (bufs1.nbytes + bufs2.nbytes) / 1e6)
    ti0 = log.timer('Initializing trans_e1_outcore', *time0)

    # fmmm, ftrans, fdrv for level 1
    fmmm = libmcscf.AO2MOmmm_ket_nr_s2
    ftrans = libmcscf.AO2MOtranse1_nr_s4
    fdrv = libmcscf.AO2MOnr_e2_drv
    for istep, sh_range in enumerate(shranges):
        log.debug('[%d/%d], AO [%d:%d], len(buf) = %d', istep + 1, nstep,
                  *sh_range)
        buf = bufs1[:sh_range[2]]
        _ao2mo.nr_e1fill(intor, sh_range, mol._atm, mol._bas, mol._env, 's4',
                         1, ao2mopt, buf)
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('AO integrals buffer', *ti0)
        bufpa = bufs2[:sh_range[2]]
        _ao2mo.nr_e1(buf, mo, pashape, 's4', 's1', out=bufpa)
        # jc_pp, kc_pp
        if level == 1:  # ppaa, papa and vhf, jcp, kcp
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('buffer-pa', *ti1)
            buf1 = bufs3[:sh_range[2]]
            fdrv(ftrans, fmmm, buf1.ctypes.data_as(ctypes.c_void_p),
                 buf.ctypes.data_as(ctypes.c_void_p),
                 mo.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(sh_range[2]),
                 ctypes.c_int(nao), (ctypes.c_int * 4)(0, nao, 0, ncore),
                 ctypes.POINTER(ctypes.c_void_p)(), ctypes.c_int(0))
            p0 = 0
            for ij in range(sh_range[0], sh_range[1]):
                i, j = _ao2mo._extract_pair(ij)
                i0 = ao_loc[i]
                j0 = ao_loc[j]
                i1 = ao_loc[i + 1]
                j1 = ao_loc[j + 1]
                di = i1 - i0
                dj = j1 - j0
                if i == j:
                    dij = di * (di + 1) // 2
                    buf = numpy.empty((di, di, nao * ncore))
                    idx = numpy.tril_indices(di)
                    buf[idx] = buf1[p0:p0 + dij]
                    buf[idx[1], idx[0]] = buf1[p0:p0 + dij]
                    buf = buf.reshape(di, di, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo1,
                                         tmp.reshape(nmo, di, ncore))
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo1, tmp)
                else:
                    dij = di * dj
                    buf = buf1[p0:p0 + dij].reshape(di, dj, nao, ncore)
                    mo1 = mo_c[i0:i1]
                    mo2 = mo_c[j0:j1]
                    tmp = numpy.einsum('uvpc,pc->uvc', buf, mo[:, :ncore])
                    tmp = lib.dot(mo1.T, tmp.reshape(di, -1))
                    j_pc += numpy.einsum('vp,pvc->pc', mo2,
                                         tmp.reshape(nmo, dj, ncore)) * 2
                    tmp = numpy.einsum('uvpc,uc->vcp', buf, mo1[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(dj, ncore, nmo)
                    k_pc += numpy.einsum('vp,vcp->pc', mo2, tmp)
                    tmp = numpy.einsum('uvpc,vc->ucp', buf, mo2[:, :ncore])
                    tmp = lib.dot(tmp.reshape(-1, nmo),
                                  mo).reshape(di, ncore, nmo)
                    k_pc += numpy.einsum('up,ucp->pc', mo1, tmp)
                p0 += dij
            if log.verbose >= logger.DEBUG1:
                ti1 = log.timer('j_cp and k_cp', *ti1)

        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('half transformation of the buffer', *ti1)

# ppaa, papa
        faapp_buf[str(istep)] = \
                bufpa.reshape(sh_range[2],nmo,ncas)[:,ncore:nocc].reshape(-1,ncas**2).T
        p0 = 0
        for ij in range(sh_range[0], sh_range[1]):
            i, j = _ao2mo._extract_pair(ij)
            i0 = ao_loc[i]
            j0 = ao_loc[j]
            i1 = ao_loc[i + 1]
            j1 = ao_loc[j + 1]
            di = i1 - i0
            dj = j1 - j0
            if i == j:
                dij = di * (di + 1) // 2
                buf1 = numpy.empty((di, di, nmo * ncas))
                idx = numpy.tril_indices(di)
                buf1[idx] = bufpa[p0:p0 + dij]
                buf1[idx[1], idx[0]] = bufpa[p0:p0 + dij]
            else:
                dij = di * dj
                buf1 = bufpa[p0:p0 + dij].reshape(di, dj, -1)
                mo1 = mo[j0:j1, ncore:nocc].copy()
                for i in range(di):
                    lib.dot(mo1.T, buf1[i], 1, papa_buf[i0 + i], 1)
            mo1 = mo[i0:i1, ncore:nocc].copy()
            buf1 = lib.dot(mo1.T, buf1.reshape(di, -1))
            papa_buf[j0:j1] += buf1.reshape(ncas, dj, -1).transpose(1, 0, 2)
            p0 += dij
        if log.verbose >= logger.DEBUG1:
            ti1 = log.timer('ppaa and papa buffer', *ti1)

        ti0 = log.timer('gen AO/transform MO [%d/%d]' % (istep + 1, nstep),
                        *ti0)
    buf = buf1 = bufpa = None
    bufs1 = bufs2 = bufs3 = None
    time1 = log.timer('mc_ao2mo pass 1', *time0)

    log.debug1('Half transformation done. Current memory %d',
               lib.current_memory()[0])

    nblk = int(
        max(8,
            min(nmo,
                (max_memory * 1e6 / 8 - papa_buf.size) / (ncas**2 * nmo))))
    log.debug1('nblk for papa = %d', nblk)
    dset = feri.create_dataset('papa', (nmo, ncas, nmo, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp = lib.dot(mo[:, i0:i1].T, papa_buf.reshape(nao, -1))
        dset[i0:i1] = tmp.reshape(i1 - i0, ncas, nmo, ncas)
    papa_buf = tmp = None
    time1 = log.timer('papa pass 2', *time1)

    tmp = numpy.empty((ncas**2, nao_pair))
    p0 = 0
    for istep, sh_range in enumerate(shranges):
        tmp[:, p0:p0 + sh_range[2]] = faapp_buf[str(istep)]
        p0 += sh_range[2]
    nblk = int(
        max(8, min(nmo,
                   (max_memory * 1e6 / 8 - tmp.size) / (ncas**2 * nmo) - 1)))
    log.debug1('nblk for ppaa = %d', nblk)
    dset = feri.create_dataset('ppaa', (nmo, nmo, ncas, ncas), 'f8')
    for i0, i1 in prange(0, nmo, nblk):
        tmp1 = _ao2mo.nr_e2(tmp,
                            mo, (i0, i1, 0, nmo),
                            's4',
                            's1',
                            ao_loc=ao_loc)
        tmp1 = tmp1.reshape(ncas, ncas, i1 - i0, nmo)
        for j in range(i1 - i0):
            dset[i0 + j] = tmp1[:, :, j].transpose(2, 0, 1)
    tmp = tmp1 = None
    time1 = log.timer('ppaa pass 2', *time1)

    faapp_buf.close()
    feri.close()
    _tmpfile1 = None
    time0 = log.timer('mc_ao2mo', *time0)
    return j_pc, k_pc
Esempio n. 6
0
def half_e1(mol, mo_coeffs, swapfile,
            intor='int2e_spinor', aosym='s4', comp=None,
            max_memory=MAX_MEMORY, ioblk_size=IOBLK_SIZE, verbose=logger.WARN,
            ao2mopt=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mol, verbose)

    ijsame = iden_coeffs(mo_coeffs[0], mo_coeffs[1])

    nmoi = mo_coeffs[0].shape[1]
    nmoj = mo_coeffs[1].shape[1]
    nao = mo_coeffs[0].shape[0]
    aosym = outcore._stand_sym_code(aosym)
    if aosym in ('s1', 's2kl', 'a2kl'):
        nao_pair = nao * nao
    else:
        nao_pair = _count_naopair(mol, nao)
    nij_pair = nmoi * nmoj

    if ijsame and aosym in ('s4', 's2ij', 'a2ij', 'a4ij', 'a4kl', 'a4'):
        log.debug('i-mo == j-mo')
        moij = numpy.asarray(mo_coeffs[0], order='F')
        ijshape = (0, nmoi, 0, nmoi)
    else:
        moij = numpy.asarray(numpy.hstack((mo_coeffs[0],mo_coeffs[1])), order='F')
        ijshape = (0, nmoi, nmoi, nmoi+nmoj)

    e1buflen, mem_words, iobuf_words, ioblk_words = \
            guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp)
# The buffer to hold AO integrals in C code
    aobuflen = int((mem_words - iobuf_words) // (nao*nao*comp))
    shranges = outcore.guess_shell_ranges(mol, (aosym not in ('s1', 's2ij', 'a2ij')),
                                          aobuflen, e1buflen, mol.ao_loc_2c(), False)
    if ao2mopt is None:
#        if intor == 'int2e_spinor':
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
#                                     'CVHFsetnr_direct_scf')
#        elif intor == 'int2e_spsp1_spinor':
#        elif intor == 'int2e_spsp1spsp2_spinor':
#        else:
#            ao2mopt = _ao2mo.AO2MOpt(mol, intor)
        ao2mopt = _ao2mo.AO2MOpt(mol, intor)

    log.debug('step1: tmpfile %.8g MB', nij_pair*nao_pair*16/1e6)
    log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB',
              nij_pair, nao_pair, mem_words*16/1e6, iobuf_words*16/1e6)

    fswap = h5py.File(swapfile, 'w')
    for icomp in range(comp):
        g = fswap.create_group(str(icomp))  # for h5py old version

    tao = numpy.asarray(mol.tmap(), dtype=numpy.int32)

    # transform e1
    ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0)
    nstep = len(shranges)
    for istep,sh_range in enumerate(shranges):
        log.debug('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \
                  istep+1, nstep, *(sh_range[:3]))
        buflen = sh_range[2]
        iobuf = numpy.empty((comp,buflen,nij_pair), dtype=numpy.complex)
        nmic = len(sh_range[3])
        p0 = 0
        for imic, aoshs in enumerate(sh_range[3]):
            log.debug1('      fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \
                       imic+1, nmic, *aoshs)
            buf = _ao2mo.r_e1(intor, moij, ijshape, aoshs,
                              mol._atm, mol._bas, mol._env,
                              tao, aosym, comp, ao2mopt)
            iobuf[:,p0:p0+aoshs[2]] = buf
            p0 += aoshs[2]
        ti2 = log.timer('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)

        e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, buflen)
        for icomp in range(comp):
            dset = fswap.create_dataset('%d/%d'%(icomp,istep),
                                        (nij_pair,iobuf.shape[1]), 'c16',
                                        chunks=None)
            for col0, col1 in prange(0, nij_pair, e2buflen):
                dset[col0:col1] = lib.transpose(iobuf[icomp,:,col0:col1])
        ti0 = log.timer('transposing to disk', *ti2)
    fswap.close()
    return swapfile
Esempio n. 7
0
def _contract_vvvv_t2(mycc, vvvv, t2T, task_locs, out=None, verbose=None):
    '''Ht2 = numpy.einsum('ijcd,acbd->ijab', t2, vvvv)
    where vvvv has to be real and has the 4-fold permutation symmetry

    Args:
        vvvv : None or integral object
            if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm
    '''
    time0 = time.clock(), time.time()
    mol = mycc.mol
    log = logger.new_logger(mycc, verbose)

    if callable(t2T):
        t2T = t2T()
    assert (t2T.dtype == numpy.double)
    nvira, nvirb = t2T.shape[:2]
    nvir2 = nvira * nvirb
    t2T = t2T.reshape(nvira, nvirb, -1)
    nocc2 = t2T.shape[2]
    Ht2 = numpy.ndarray(t2T.shape, dtype=t2T.dtype, buffer=out)
    Ht2[:] = 0

    _dgemm = lib.numpy_helper._dgemm

    def contract_blk_(Ht2, t2T, eri, i0, i1, j0, j1):
        ic = i1 - i0
        jc = j1 - j0
        #:Ht2[j0:j1] += numpy.einsum('efx,efab->abx', t2T[i0:i1], eri)
        _dgemm('T', 'N', jc * nvirb, nocc2, ic * nvirb,
               eri.reshape(ic * nvirb, jc * nvirb), t2T.reshape(-1, nocc2),
               Ht2.reshape(nvir2, nocc2), 1, 1, 0, i0 * nvirb * nocc2,
               j0 * nvirb * nocc2)

    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])
    if vvvv is None:  # AO-direct CCSD
        ao_loc = mol.ao_loc_nr()
        intor = mol._add_suffix('int2e')
        ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                                 'CVHFsetnr_direct_scf')
        blksize = max(BLKMIN, numpy.sqrt(max_memory * .9e6 / 8 / nvirb**2 / 2))
        fint = gto.moleintor.getints4c
        fload = ccsd._ccsd.libcc.CCload_eri

        ntasks = mpi.pool.size
        task_sh_locs = task_locs
        sh_ranges_tasks = []
        for task in range(ntasks):
            sh0 = task_sh_locs[task]
            sh1 = task_sh_locs[task + 1]
            sh_ranges = ao2mo.outcore.balance_partition(
                ao_loc, blksize, sh0, sh1)
            sh_ranges_tasks.append(sh_ranges)

        blksize = max(
            max(x[2] for x in sh_ranges) if sh_ranges else 0
            for sh_ranges in sh_ranges_tasks)
        eribuf = numpy.empty((blksize, blksize, nvirb, nvirb))
        loadbuf = numpy.empty((blksize, blksize, nvirb, nvirb))

        ao_sh_ranges = sh_ranges_tasks[rank]
        ao_sh0 = task_sh_locs[rank]
        ao_sh1 = task_sh_locs[rank + 1]
        ao_offset = ao_loc[ao_sh0]
        assert (nvira == ao_loc[ao_sh1] - ao_loc[ao_sh0])

        for task_id, t2T in _rotate_tensor_block(t2T):
            sh_ranges = sh_ranges_tasks[task_id]
            sh0 = task_sh_locs[task_id]
            cur_offset = ao_loc[sh0]

            for ish0, ish1, ni in sh_ranges:
                for jsh0, jsh1, nj in ao_sh_ranges:
                    eri = fint(intor,
                               mol._atm,
                               mol._bas,
                               mol._env,
                               shls_slice=(ish0, ish1, jsh0, jsh1),
                               aosym='s2kl',
                               ao_loc=ao_loc,
                               cintopt=ao2mopt._cintopt,
                               out=eribuf)
                    i0, i1 = ao_loc[ish0] - cur_offset, ao_loc[
                        ish1] - cur_offset
                    j0, j1 = ao_loc[jsh0] - ao_offset, ao_loc[jsh1] - ao_offset
                    tmp = numpy.ndarray((i1 - i0, nvirb, j1 - j0, nvirb),
                                        buffer=loadbuf)
                    fload(tmp.ctypes.data_as(ctypes.c_void_p),
                          eri.ctypes.data_as(ctypes.c_void_p),
                          (ctypes.c_int * 4)(i0, i1, j0, j1),
                          ctypes.c_int(nvirb))
                    contract_blk_(Ht2, t2T, tmp, i0, i1, j0, j1)
                    time0 = log.timer_debug1(
                        'AO-vvvv [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                        *time0)
    else:
        raise NotImplementedError
    return Ht2
Esempio n. 8
0
    def add_wvvVV_(self, t1, t2, eris, t2new_tril, with_ovvv=True):
        time0 = time.clock(), time.time()
        nocc, nvir = t1.shape

        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1):
            nao = tau.shape[-1]
            ic = i1 - i0
            jc = j1 - j0
            #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, jc*nao, ic*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, i0*nao, 0, j0*nao)

            #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri)
            _dgemm('N', 'T', nocc*(nocc+1)//2, ic*nao, jc*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, j0*nao, 0, i0*nao)

        def contract_tril_(t2new_tril, tau, eri, a0, a):
            nvir = tau.shape[-1]
            #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1-a0)*nvir,
                   tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                   t2new_tril.reshape(-1,nvir*nvir), 1, 1, a0*nvir, 0, a*nvir)

            #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a])
            if a > a0:
                _dgemm('N', 'T', nocc*(nocc+1)//2, (a-a0)*nvir, nvir,
                       tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                       t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, a0*nvir)

        if self.direct:   # AO-direct CCSD
            mol = self.mol
            mo = _mo_without_core(self, self.mo_coeff)
            nao, nmo = mo.shape
            nao_pair = nao * (nao+1) // 2
            aos = numpy.asarray(mo[:,nocc:].T, order='F')
            nocc2 = nocc*(nocc+1)//2
            outbuf = numpy.empty((nocc2,nao,nao))
            tau = numpy.ndarray((nocc2,nvir,nvir), buffer=outbuf)
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
                tau[p0:p0+i+1] += t2[i,:i+1]
                p0 += i + 1
            tau = _ao2mo.nr_e2(tau.reshape(nocc2,nvir**2), aos, (0,nao,0,nao), 's1', 's1')
            tau = tau.reshape(nocc2,nao,nao)
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

            ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
            outbuf[:] = 0
            ao_loc = mol.ao_loc_nr()
            max_memory = max(0, self.max_memory - lib.current_memory()[0])
            dmax = max(4, int(numpy.sqrt(max_memory*.95e6/8/nao**2/2)))
            sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
            dmax = max(x[2] for x in sh_ranges)
            eribuf = numpy.empty((dmax,dmax,nao,nao))
            loadbuf = numpy.empty((dmax,dmax,nao,nao))
            fint = gto.moleintor.getints2e

            for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
                for jsh0, jsh1, nj in sh_ranges[:ip]:
                    eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                               shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl',
                               ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                    i0, i1 = ao_loc[ish0], ao_loc[ish1]
                    j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                    tmp = numpy.ndarray((i1-i0,nao,j1-j0,nao), buffer=loadbuf)
                    _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                           eri.ctypes.data_as(ctypes.c_void_p),
                                           (ctypes.c_int*4)(i0, i1, j0, j1),
                                           ctypes.c_int(nao))
                    contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1)
                    time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                                (ish0,ish1,jsh0,jsh1), *time0)
                eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                           shls_slice=(ish0,ish1,ish0,ish1), aosym='s4',
                           ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                for i in range(i1-i0):
                    p0, p1 = i*(i+1)//2, (i+1)*(i+2)//2
                    tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf)
                    contract_tril_(outbuf, tau, tmp, i0, i0+i)
                time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                            (ish0,ish1,ish0,ish1), *time0)
            eribuf = loadbuf = eri = tmp = None

            tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,nocc,nmo), 's1', 's1', out=tau)
            t2new_tril += tmp.reshape(nocc2,nvir,nvir)

            if with_ovvv:
                #: tmp = numpy.einsum('ijcd,ka,kdcb->ijba', tau, t1, eris.ovvv)
                #: t2new -= tmp + tmp.transpose(1,0,3,2)
                tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,0,nocc), 's1', 's1', out=tau)
                t2new_tril -= lib.ddot(tmp.reshape(nocc2*nvir,nocc), t1).reshape(nocc2,nvir,nvir)
                tmp = _ao2mo.nr_e2(outbuf, mo, (0,nocc,nocc,nmo), 's1', 's1', out=tau)
                #: t2new_tril -= numpy.einsum('xkb,ka->xab', tmp.reshape(-1,nocc,nvir), t1)
                tmp = lib.transpose(tmp.reshape(nocc2,nocc,nvir), axes=(0,2,1), out=outbuf)
                tmp = lib.ddot(tmp.reshape(nocc2*nvir,nocc), t1, 1,
                               numpy.ndarray((nocc2*nvir,nvir), buffer=tau), 0)
                tmp = lib.transpose(tmp.reshape(nocc2,nvir,nvir), axes=(0,2,1), out=outbuf)
                t2new_tril -= tmp.reshape(nocc2,nvir,nvir)

        else:
            #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
            #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
            tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
                tau[p0:p0+i+1] += t2[i,:i+1]
                p0 += i + 1
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)
            p0 = 0
            outbuf = numpy.empty((nvir,nvir,nvir))
            outbuf1 = numpy.empty((nvir,nvir,nvir))
            handler = None
            for a in range(nvir):
                buf = lib.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf)
                outbuf, outbuf1 = outbuf1, outbuf
                handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a)
                p0 += a+1
                time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0)
            handler.join()
        return t2new_tril
Esempio n. 9
0
def _make_eris(mp, mo_coeff=None, verbose=None):
    log = logger.new_logger(mp, verbose)
    time0 = (time.clock(), time.time())

    log.debug('transform (ia|jb) outcore')
    mol = mp.mol
    nocc = mp.nocc
    nmo = mp.nmo
    nvir = nmo - nocc

    eris = mp2._ChemistsERIs(mp, mo_coeff)
    nao = eris.mo_coeff.shape[0]
    assert (nvir <= nao)
    orbo = eris.mo_coeff[:, :nocc]
    orbv = numpy.asarray(eris.mo_coeff[:, nocc:], order='F')
    eris.feri = lib.H5TmpFile()

    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    fint = gto.moleintor.getints4c

    ntasks = mpi.pool.size
    olocs = [_task_location(nocc, task_id) for task_id in range(ntasks)]
    oloc0, oloc1 = olocs[rank]
    nocc_seg = oloc1 - oloc0
    log.debug2('olocs %s', olocs)

    ao_loc = mol.ao_loc_nr()
    task_sh_locs = lib.misc._balanced_partition(ao_loc, ntasks)
    log.debug2('task_sh_locs %s', task_sh_locs)
    ao_sh0 = task_sh_locs[rank]
    ao_sh1 = task_sh_locs[rank + 1]
    ao_loc0 = ao_loc[ao_sh0]
    ao_loc1 = ao_loc[ao_sh1]
    nao_seg = ao_loc1 - ao_loc0
    orbo_seg = orbo[ao_loc0:ao_loc1]

    mem_now = lib.current_memory()[0]
    max_memory = max(0, mp.max_memory - mem_now)
    dmax = numpy.sqrt(max_memory * .9e6 / 8 / ((nao + nocc) *
                                               (nao_seg + nocc)))
    dmax = min(nao // 4 + 2, max(BLKMIN, min(comm.allgather(dmax))))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    sh_ranges = comm.bcast(sh_ranges)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao_seg))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax,
              nocc * nocc_seg * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6)

    def save(count, tmp_xo):
        di, dj = tmp_xo.shape[2:4]
        tmp_xo = [tmp_xo[p0:p1] for p0, p1 in olocs]
        tmp_xo = mpi.alltoall(tmp_xo, split_recvbuf=True)
        tmp_xo = sum(tmp_xo).reshape(nocc_seg, nocc, di, dj)
        ftmp[str(count) + 'b'] = tmp_xo

        tmp_ox = mpi.alltoall([tmp_xo[:, p0:p1] for p0, p1 in olocs],
                              split_recvbuf=True)
        tmp_ox = [
            tmp_ox[i].reshape(p1 - p0, nocc_seg, di, dj)
            for i, (p0, p1) in enumerate(olocs)
        ]
        ftmp[str(count) + 'a'] = numpy.vstack(tmp_ox)

    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(save) as bg_save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip + 1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0, i1, j0, j1))

                shls_slice = (0, mol.nbas, ish0, ish1, jsh0, jsh1, ao_sh0,
                              ao_sh1)
                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=shls_slice,
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_xo = lib.einsum('pi,pqrs->iqrs', orbo, eri)
                tmp_xo = lib.einsum('iqrs,sl->ilqr', tmp_xo, orbo_seg)
                bg_save(count, tmp_xo)
                tmp_xo = None
                count += 1
                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    eri = eribuf = None
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)

    eris.ovov = eris.feri.create_dataset('ovov', (nocc, nvir, nocc_seg, nvir),
                                         'f8')
    occblk = int(
        min(nocc,
            max(BLKMIN, max_memory * .9e6 / 8 / (nao**2 * nocc_seg + 1) / 5)))

    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(jk_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = ftmp[str(k) + 'a'][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k) + 'b'][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def save(i0, i1, dat):
        eris.ovov[i0:i1] = dat

    buf_prefecth = numpy.empty((occblk, nocc_seg, nao, nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk * nocc_seg, nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocc_seg, nao, nao)

                dat = _ao2mo.nr_e2(eri,
                                   orbv, (0, nvir, 0, nvir),
                                   's1',
                                   's1',
                                   out=bufw)
                bsave(
                    i0, i1,
                    dat.reshape(i1 - i0, nocc_seg, nvir,
                                nvir).transpose(0, 2, 1, 3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1),
                                         *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    mp._eris = eris
    return eris
Esempio n. 10
0
def half_e1(mol, mo_coeffs, swapfile,
            intor='cint2e_sph', aosym='s4', comp=1,
            max_memory=2000, ioblk_size=IOBLK_SIZE, verbose=logger.WARN, compact=True,
            ao2mopt=None):
    r'''Half transform arbitrary spherical AO integrals to MO integrals
    for the given two sets of orbitals

    Args:
        mol : :class:`Mole` object
            AO integrals will be generated in terms of mol._atm, mol._bas, mol._env
        mo_coeff : ndarray
            Transform (ij|kl) with the same set of orbitals.
        swapfile : str or h5py File or h5py Group object
            To store the transformed integrals, in HDF5 format.  The transformed
            integrals are saved in blocks.

    Kwargs
        intor : str
            Name of the 2-electron integral.  Ref to :func:`getints_by_shell`
            for the complete list of available 2-electron integral names
        aosym : int or str
            Permutation symmetry for the AO integrals

            | 4 or '4' or 's4': 4-fold symmetry (default)
            | '2ij' or 's2ij' : symmetry between i, j in (ij|kl)
            | '2kl' or 's2kl' : symmetry between k, l in (ij|kl)
            | 1 or '1' or 's1': no symmetry
            | 'a4ij' : 4-fold symmetry with anti-symmetry between i, j in (ij|kl) (TODO)
            | 'a4kl' : 4-fold symmetry with anti-symmetry between k, l in (ij|kl) (TODO)
            | 'a2ij' : anti-symmetry between i, j in (ij|kl) (TODO)
            | 'a2kl' : anti-symmetry between k, l in (ij|kl) (TODO)

        comp : int
            Components of the integrals, e.g. cint2e_ip_sph has 3 components.
        verbose : int
            Print level
        max_memory : float or int
            The maximum size of cache to use (in MB), large cache may **not**
            improve performance.
        ioblk_size : float or int
            The block size for IO, large block size may **not** improve performance
        verbose : int
            Print level
        compact : bool
            When compact is True, depending on the four oribital sets, the
            returned MO integrals has (up to 4-fold) permutation symmetry.
            If it's False, the function will abandon any permutation symmetry,
            and return the "plain" MO integrals
        ao2mopt : :class:`AO2MOpt` object
            Precomputed data to improve perfomance

    Returns:
        None

    '''
    time0 = (time.clock(), time.time())
    if isinstance(verbose, logger.Logger):
        log = verbose
    else:
        log = logger.Logger(mol.stdout, verbose)

    nao = mo_coeffs[0].shape[0]
    aosym = _stand_sym_code(aosym)
    if aosym in ('s4', 's2ij'):
        nao_pair = nao * (nao+1) // 2
    else:
        nao_pair = nao * nao

    ijmosym, nij_pair, moij, ijshape = \
            incore._conc_mos(mo_coeffs[0], mo_coeffs[1],
                             compact and aosym in ('s4', 's2ij'))

    e1buflen, mem_words, iobuf_words, ioblk_words = \
            guess_e1bufsize(max_memory, ioblk_size, nij_pair, nao_pair, comp)
# The buffer to hold AO integrals in C code, see line (@)
    aobuflen = int((mem_words - iobuf_words) // (nao_pair*comp))
    shranges = guess_shell_ranges(mol, (aosym in ('s4', 's2kl')), e1buflen, aobuflen)
    if ao2mopt is None:
        if intor == 'cint2e_sph':
            ao2mopt = _ao2mo.AO2MOpt(mol, intor, 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
        else:
            ao2mopt = _ao2mo.AO2MOpt(mol, intor)

    if isinstance(swapfile, str):
        fswap = h5py.File(swapfile, 'w')
    else:
        fswap = swapfile
    for icomp in range(comp):
        g = fswap.create_group(str(icomp)) # for h5py old version

    log.debug('step1: tmpfile %s  %.8g MB', fswap.filename, nij_pair*nao_pair*8/1e6)
    log.debug('step1: (ij,kl) = (%d,%d), mem cache %.8g MB, iobuf %.8g MB',
              nij_pair, nao_pair, mem_words*8/1e6, iobuf_words*8/1e6)

    # transform e1
    ti0 = log.timer('Initializing ao2mo.outcore.half_e1', *time0)
    nstep = len(shranges)
    maxbuflen = max([x[2] for x in shranges])
    bufs1 = numpy.empty((comp*maxbuflen,nao_pair))
    bufs2 = numpy.empty((comp*maxbuflen,nij_pair))
    for istep,sh_range in enumerate(shranges):
        log.debug1('step 1 [%d/%d], AO [%d:%d], len(buf) = %d', \
                   istep+1, nstep, *(sh_range[:3]))
        buflen = sh_range[2]
        iobuf = bufs2[:comp*buflen].reshape(comp,buflen,nij_pair)
        nmic = len(sh_range[3])
        p0 = 0
        for imic, aoshs in enumerate(sh_range[3]):
            log.debug2('      fill iobuf micro [%d/%d], AO [%d:%d], len(aobuf) = %d', \
                       imic+1, nmic, *aoshs)
            buf = bufs1[:comp*aoshs[2]] # (@)
            _ao2mo.nr_e1fill(intor, aoshs, mol._atm, mol._bas, mol._env,
                             aosym, comp, ao2mopt, out=buf)
            buf = _ao2mo.nr_e1(buf, moij, ijshape, aosym, ijmosym)
            iobuf[:,p0:p0+aoshs[2]] = buf.reshape(comp,aoshs[2],-1)
            p0 += aoshs[2]
        ti2 = log.timer_debug1('gen AO/transform MO [%d/%d]'%(istep+1,nstep), *ti0)

        e2buflen, chunks = guess_e2bufsize(ioblk_size, nij_pair, buflen)
        for icomp in range(comp):
            _transpose_to_h5g(fswap, '%d/%d'%(icomp,istep), iobuf[icomp],
                              e2buflen, None)
        ti0 = log.timer_debug1('transposing to disk', *ti2)
    bufs1 = bufs2 = None
    if isinstance(swapfile, str):
        fswap.close()
    return swapfile
Esempio n. 11
0
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nao, nocc = orbo.shape
    nvir = orbv.shape[1]
    nbas = mol.nbas
    assert (nvir <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(
        4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocc)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax,
              nocc**2 * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6)

    buf_i = numpy.empty((nocc * dmax**2 * nao))
    buf_li = numpy.empty((nocc**2 * dmax**2))
    buf1 = numpy.empty_like(buf_li)

    fint = gto.moleintor.getints4c
    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip + 1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0, i1, j0, j1))

                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0,
                                       nbas),
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = numpy.ndarray((nocc, (i1 - i0) * (j1 - j0) * nao),
                                      buffer=buf_i)
                tmp_li = numpy.ndarray((nocc, nocc * (i1 - i0) * (j1 - j0)),
                                       buffer=buf_li)
                lib.ddot(orbo.T,
                         eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao),
                         c=tmp_i)
                lib.ddot(orbo.T,
                         tmp_i.reshape(nocc * (i1 - i0) * (j1 - j0), nao).T,
                         c=tmp_li)
                tmp_li = tmp_li.reshape(nocc, nocc, (i1 - i0), (j1 - j0))
                save(str(count), tmp_li.transpose(1, 0, 2, 3))
                buf_li, buf1 = buf1, buf_li
                count += 1
                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None

    chunks = (nvir, nvir)
    h5dat = feri.create_dataset('ovov', (nocc * nvir, nocc * nvir),
                                'f8',
                                chunks=chunks)
    # jk_where is the sorting indices for the stacked (oO|pP) integrals in pass 2
    jk_where = []
    aoao_idx = numpy.arange(nao * nao).reshape(nao, nao)
    for i0, i1, j0, j1 in jk_blk_slices:
        # idx of pP in <oO|pP>
        jk_where.append(aoao_idx[i0:i1, j0:j1].ravel())
        if i0 != j0:
            # idx of pP in (<oO|pP>).transpose(1,0,3,2)
            jk_where.append(aoao_idx[j0:j1, i0:i1].ravel())
    jk_where = numpy.argsort(numpy.hstack(jk_where)).astype(numpy.int32)
    orbv = numpy.asarray(orbv, order='F')

    occblk = int(
        min(nocc,
            max(4, 250 / nocc, max_memory * .9e6 / 8 / (nao**2 * nocc) / 3)))

    def load(i0, eri):
        if i0 >= nocc:
            return
        i1 = min(i0 + occblk, nocc)
        eri = eri[:(i1 - i0) * nocc]
        p1 = 0
        for k, jk_slice in enumerate(jk_blk_slices):
            dat = numpy.asarray(ftmp[str(k)][i0:i1]).reshape((i1 - i0) * nocc,
                                                             -1)
            p0, p1 = p1, p1 + dat.shape[1]
            eri[:, p0:p1] = dat
            if jk_slice[0] != jk_slice[2]:
                dat = numpy.asarray(ftmp[str(k)][:, i0:i1])
                dat = dat.transpose(1, 0, 3, 2).reshape((i1 - i0) * nocc, -1)
                p0, p1 = p1, p1 + dat.shape[1]
                eri[:, p0:p1] = dat

    def save(i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(
                nvir, nocc * nvir)

    buf_prefecth = numpy.empty((occblk * nocc, nao**2))
    buf = numpy.empty_like(buf_prefecth)
    buf1 = numpy.empty_like(buf_prefecth)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                eri = buf[:(i1 - i0) * nocc]
                prefetch(i1, buf_prefecth)

                idx = numpy.arange(eri.shape[0], dtype=numpy.int32)
                dat = lib.take_2d(eri, idx, jk_where, out=buf1)
                dat = _ao2mo.nr_e2(dat,
                                   orbv, (0, nvir, 0, nvir),
                                   's1',
                                   's1',
                                   out=eri)
                bsave(
                    i0, i1,
                    dat.reshape(i1 - i0, nocc, nvir,
                                nvir).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1),
                                         *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    return h5dat