Beispiel #1
0
def _dm1_mo2ao(dm1, ket, bra):
    nao, nket = ket.shape
    nbra = bra.shape[1]
    nset = len(dm1)
    dm1 = lib.ddot(ket, dm1.transpose(1,0,2).reshape(nket,nset*nbra))
    dm1 = dm1.reshape(nao,nset,nbra).transpose(1,0,2).reshape(nset*nao,nbra)
    return lib.ddot(dm1, bra.T).reshape(nset,nao,nao)
Beispiel #2
0
def zdotNC(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0):
    '''c = a*b.conj()'''
    cR = lib.ddot(aR, bR, alpha, cR, beta)
    cR = lib.ddot(aI, bI, alpha, cR, 1)
    cI = lib.ddot(aR, bI, -alpha, cI, beta)
    cI = lib.ddot(aI, bR, alpha, cI, 1)
    return cR, cI
Beispiel #3
0
def zdotNC(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0):
    '''c = a*b.conj()'''
    cR = lib.ddot(aR, bR, alpha, cR, beta)
    cR = lib.ddot(aI, bI, alpha, cR, 1   )
    cI = lib.ddot(aR, bI,-alpha, cI, beta)
    cI = lib.ddot(aI, bR, alpha, cI, 1   )
    return cR, cI
Beispiel #4
0
def zdotCN(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0):
    """c = a.conj()*b"""
    cR = lib.ddot(aR, bR, alpha, cR, beta)
    cR = lib.ddot(aI, bI, alpha, cR, 1)
    cI = lib.ddot(aR, bI, alpha, cI, beta)
    cI = lib.ddot(aI, bR, -alpha, cI, 1)
    return cR, cI
Beispiel #5
0
def make_rdm1(ci, nmo, nocc):
    nvir = nmo - nocc
    c0 = ci[0]
    c1 = ci[1:nocc*nvir+1].reshape(nocc,nvir)
    c2 = ci[nocc*nvir+1:].reshape(nocc,nocc,nvir,nvir)
    dov = c0*c1 * 2
    dov += numpy.einsum('jb,ijab->ia', c1, c2) * 4
    dov -= numpy.einsum('jb,ijba->ia', c1, c2) * 2
    doo  = numpy.einsum('ia,ka->ik', c1, c1) * -2
    #:doo -= numpy.einsum('ijab,ikab->jk', c2, c2) * 4
    #:doo += numpy.einsum('ijab,kiab->jk', c2, c2) * 2
    theta = c2*2 - c2.transpose(0,1,3,2)
    lib.ddot(c2.reshape(nocc,-1), theta.reshape(nocc,-1).T, -2, doo, 1)
    dvv  = numpy.einsum('ia,ic->ca', c1, c1) * 2
    #:dvv += numpy.einsum('ijab,ijac->cb', c2, c2) * 4
    #:dvv -= numpy.einsum('ijab,jiac->cb', c2, c2) * 2
    lib.ddot(c2.reshape(-1,nvir).T, theta.reshape(-1,nvir), 2, dvv, 1)

    rdm1 = numpy.empty((nmo,nmo))
    rdm1[:nocc,nocc:] = dov
    rdm1[nocc:,:nocc] = dov.T
    rdm1[:nocc,:nocc] = doo
    rdm1[nocc:,nocc:] = dvv

    for i in range(nocc):
        rdm1[i,i] += 2
    return rdm1
Beispiel #6
0
 def contract_k(pLqR, pLqI):
     # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj'
     #:pLq = (LpqR + LpqI.reshape(-1,nao,nao)*1j).transpose(1,0,2)
     #:tmp = numpy.dot(dm, pLq.reshape(nao,-1))
     #:vk += numpy.dot(pLq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao))
     nrow = pLqR.shape[1]
     tmpR = numpy.ndarray((nao, nrow * nao), buffer=buf2R)
     if k_real:
         for i in range(nset):
             lib.ddot(dmsR[i], pLqR.reshape(nao, -1), 1, tmpR)
             lib.ddot(pLqR.reshape(-1, nao).T, tmpR.reshape(-1, nao), 1, vkR[i], 1)
     else:
         tmpI = numpy.ndarray((nao, nrow * nao), buffer=buf2I)
         for i in range(nset):
             zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao, -1), pLqI.reshape(nao, -1), 1, tmpR, tmpI, 0)
             zdotCN(
                 pLqR.reshape(-1, nao).T,
                 pLqI.reshape(-1, nao).T,
                 tmpR.reshape(-1, nao),
                 tmpI.reshape(-1, nao),
                 1,
                 vkR[i],
                 vkI[i],
                 1,
             )
Beispiel #7
0
def make_rdm1(ci, nmo, nocc):
    nvir = nmo - nocc
    c0 = ci[0]
    c1 = ci[1:nocc * nvir + 1].reshape(nocc, nvir)
    c2 = ci[nocc * nvir + 1:].reshape(nocc, nocc, nvir, nvir)
    dov = c0 * c1 * 2
    dov += numpy.einsum('jb,ijab->ia', c1, c2) * 4
    dov -= numpy.einsum('jb,ijba->ia', c1, c2) * 2
    doo = numpy.einsum('ia,ka->ik', c1, c1) * -2
    #:doo -= numpy.einsum('ijab,ikab->jk', c2, c2) * 4
    #:doo += numpy.einsum('ijab,kiab->jk', c2, c2) * 2
    theta = c2 * 2 - c2.transpose(0, 1, 3, 2)
    lib.ddot(c2.reshape(nocc, -1), theta.reshape(nocc, -1).T, -2, doo, 1)
    dvv = numpy.einsum('ia,ic->ca', c1, c1) * 2
    #:dvv += numpy.einsum('ijab,ijac->cb', c2, c2) * 4
    #:dvv -= numpy.einsum('ijab,jiac->cb', c2, c2) * 2
    lib.ddot(c2.reshape(-1, nvir).T, theta.reshape(-1, nvir), 2, dvv, 1)

    rdm1 = numpy.empty((nmo, nmo))
    rdm1[:nocc, nocc:] = dov
    rdm1[nocc:, :nocc] = dov.T
    rdm1[:nocc, :nocc] = doo
    rdm1[nocc:, nocc:] = dvv

    for i in range(nocc):
        rdm1[i, i] += 2
    return rdm1
Beispiel #8
0
def transform_integrals_df(myadc):
    cput0 = (time.clock(), time.time())
    log = logger.Logger(myadc.stdout, myadc.verbose)

    mo_coeff = np.asarray(myadc.mo_coeff, order='F')
    nocc = myadc._nocc
    nao, nmo = mo_coeff.shape
    nvir = myadc._nmo - myadc._nocc
    nvir_pair = nvir*(nvir+1)//2
    with_df = myadc.with_df
    naux = with_df.get_naoaux()
    eris = lambda:None
    eris.vvvv = None
    eris.ovvv = None
    
    Loo = np.empty((naux,nocc,nocc))
    Lvo = np.empty((naux,nvir,nocc))
    eris.Lvv = np.empty((naux,nvir,nvir))
    eris.Lov = np.empty((naux,nocc,nvir))

    ijslice = (0, nmo, 0, nmo)
    Lpq = None
    p1 = 0

    for eri1 in with_df.loop():
        Lpq = ao2mo._ao2mo.nr_e2(eri1, mo_coeff, ijslice, aosym='s2', out=Lpq).reshape(-1,nmo,nmo)
        p0, p1 = p1, p1 + Lpq.shape[0]
        Loo[p0:p1] = Lpq[:,:nocc,:nocc]
        #Lov[p0:p1] = Lpq[:,:nocc,nocc:]
        eris.Lov[p0:p1] = Lpq[:,:nocc,nocc:]
        Lvo[p0:p1] = Lpq[:,nocc:,:nocc]
        eris.Lvv[p0:p1] = Lpq[:,nocc:,nocc:]

    Loo = Loo.reshape(naux,nocc*nocc)
    eris.Lov = eris.Lov.reshape(naux,nocc*nvir)
    Lvo = Lvo.reshape(naux,nocc*nvir)
    eris.Lvv = eris.Lvv.reshape(naux,nvir*nvir)

    eris.feri1 = lib.H5TmpFile()
    eris.oooo = eris.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8')
    eris.oovv = eris.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir))
    eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc))
    eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc))

    eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc)
    eris.ovoo[:] = lib.ddot(eris.Lov.T, Loo).reshape(nocc,nvir,nocc,nocc)
    eris.oovv[:] = lib.ddot(Loo.T, eris.Lvv).reshape(nocc,nocc,nvir,nvir)
    eris.ovvo[:] = lib.ddot(eris.Lov.T, Lvo).reshape(nocc,nvir,nvir,nocc)
    log.timer('DF-ADC integral transformation', *cput0)
    return eris
Beispiel #9
0
def get_eri(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4,3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.gs)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.gs, kptijkl[:2], q, aosym='s2'):
        vG = coulG[p0:p1]
        pqkRv = pqkR * vG
        pqkIv = pqkI * vG
        eriR += lib.ddot(pqkRv, pqkR.T)
        eriR += lib.ddot(pqkIv, pqkI.T)
        pqkR = pqkI = None
    return eriR
Beispiel #10
0
def get_eri(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4,3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'):
        vG = coulG[p0:p1]
        pqkRv = pqkR * vG
        pqkIv = pqkI * vG
        # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^*
        eriR += lib.ddot(pqkRv, pqkR.T)
        eriR += lib.ddot(pqkIv, pqkI.T)
        pqkR = pqkI = None
    return eriR
Beispiel #11
0
def _contract_compact(mydf, mos, coulG, max_memory):
    cell = mydf.cell
    moiT, mokT = mos
    nmoi, ngrids = moiT.shape
    nmok = mokT.shape[0]
    wcoulG = coulG * (cell.vol/ngrids)

    def fill_orbital_pair(moT, i0, i1, buf):
        npair = i1*(i1+1)//2 - i0*(i0+1)//2
        out = numpy.ndarray((npair,ngrids), dtype=buf.dtype, buffer=buf)
        ij = 0
        for i in range(i0, i1):
            numpy.einsum('p,jp->jp', moT[i], moT[:i+1], out=out[ij:ij+i+1])
            ij += i + 1
        return out

    eri = numpy.empty((nmoi*(nmoi+1)//2,nmok*(nmok+1)//2))
    blksize = int(min(max(nmoi*(nmoi+1)//2, nmok*(nmok+1)//2),
                      (max_memory*1e6/8 - eri.size)/2/ngrids+1))
    buf = numpy.empty((blksize,ngrids))
    for p0, p1 in lib.prange_tril(0, nmoi, blksize):
        mo_pairs_G = tools.fft(fill_orbital_pair(moiT, p0, p1, buf), mydf.mesh)
        mo_pairs_G*= wcoulG
        v = tools.ifft(mo_pairs_G, mydf.mesh)
        vR = numpy.asarray(v.real, order='C')
        for q0, q1 in lib.prange_tril(0, nmok, blksize):
            mo_pairs = numpy.asarray(fill_orbital_pair(mokT, q0, q1, buf), order='C')
            eri[p0*(p0+1)//2:p1*(p1+1)//2,
                q0*(q0+1)//2:q1*(q1+1)//2] = lib.ddot(vR, mo_pairs.T)
        v = None
    return eri
Beispiel #12
0
def get_eri_laplacian(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4,3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh)
    G2 = numpy.einsum('gx,gx->g', Gv, Gv)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'):
        vG = coulG[p0:p1]
        G2_vG = -G2[p0:p1] * vG  # = \nabla^2 * f12(G)
        pqkRv = (pqkR * G2_vG)
        pqkIv = (pqkI * G2_vG)
        eriR += lib.ddot(pqkRv, pqkR.T)
        eriR += lib.ddot(pqkIv, pqkI.T)
        pqkR = pqkI = None
    return eriR.reshape(nao,nao,nao,nao)
Beispiel #13
0
def get_eri_ip1(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4,3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'):
        vG = coulG[p0:p1]
        G_vG = Gv[p0:p1].T * vG  # = -i\nabla * f12(G)
        pqkRv = (pqkR * G_vG[:,None,:]).reshape(-1,p1-p0)
        pqkIv = (pqkI * G_vG[:,None,:]).reshape(-1,p1-p0)
        # Imaginary part of rho(G) [-i\nabla*f12(G)] [rho(G)]^*
        eriR += lib.ddot(pqkIv, pqkR.reshape(-1,p1-p0).T)
        eriR -= lib.ddot(pqkRv, pqkI.reshape(-1,p1-p0).T)
        pqkR = pqkI = None
    return eriR.reshape(3,nao,nao,nao,nao)
def get_eri_ip1(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4, 3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'):
        vG = coulG[p0:p1]
        G_vG = Gv[p0:p1].T * vG  # = -i\nabla * f12(G)
        pqkRv = (pqkR * G_vG[:, None, :]).reshape(-1, p1 - p0)
        pqkIv = (pqkI * G_vG[:, None, :]).reshape(-1, p1 - p0)
        # Imaginary part of rho(G) [-i\nabla*f12(G)] [rho(G)]^*
        eriR += lib.ddot(pqkIv, pqkR.reshape(-1, p1 - p0).T)
        eriR -= lib.ddot(pqkRv, pqkI.reshape(-1, p1 - p0).T)
        pqkR = pqkI = None
    return eriR.reshape(3, nao, nao, nao, nao)
def get_eri_laplacian(mydf):
    eriR = 0
    kptijkl = numpy.zeros((4, 3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh)
    G2 = numpy.einsum('gx,gx->g', Gv, Gv)
    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'):
        vG = coulG[p0:p1]
        G2_vG = -G2[p0:p1] * vG  # = \nabla^2 * f12(G)
        pqkRv = (pqkR * G2_vG)
        pqkIv = (pqkI * G2_vG)
        eriR += lib.ddot(pqkRv, pqkR.T)
        eriR += lib.ddot(pqkIv, pqkI.T)
        pqkR = pqkI = None
    return eriR.reshape(nao, nao, nao, nao)
Beispiel #16
0
def get_eri_3c2e(mydf):
    from pyscf.gto.ft_ao import ft_ao
    eriR = 0
    kptijkl = numpy.zeros((4,3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh)
    ao = ft_ao(cell, Gv)

    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'):
        vG = coulG[p0:p1]
        pqkRv = pqkR * vG
        pqkIv = pqkI * vG
        # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^*
        eriR += lib.ddot(pqkRv, ao[p0:p1].real)
        eriR += lib.ddot(pqkIv, ao[p0:p1].imag)
        pqkR = pqkI = None
    return eriR
Beispiel #17
0
 def contract_k(pLqR, pLqI):
     # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj'
     #:pLq = (LpqR + LpqI.reshape(-1,nao,nao)*1j).transpose(1,0,2)
     #:tmp = numpy.dot(dm, pLq.reshape(nao,-1))
     #:vk += numpy.dot(pLq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao))
     nrow = pLqR.shape[1]
     tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf2R)
     if k_real:
         for i in range(nset):
             lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR)
             lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao), 1, vkR[i], 1)
     else:
         tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf2I)
         for i in range(nset):
             zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1),
                    pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0)
             zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T,
                    tmpR.reshape(-1,nao), tmpI.reshape(-1,nao),
                    1, vkR[i], vkI[i], 1)
def get_eri_3c2e(mydf):
    from pyscf.gto.ft_ao import ft_ao
    eriR = 0
    kptijkl = numpy.zeros((4, 3))
    q = numpy.zeros(3)
    coulG = mydf.weighted_coulG(q, False, mydf.mesh)
    Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh)
    ao = ft_ao(cell, Gv)

    for pqkR, pqkI, p0, p1 \
            in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'):
        vG = coulG[p0:p1]
        pqkRv = pqkR * vG
        pqkIv = pqkI * vG
        # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^*
        eriR += lib.ddot(pqkRv, ao[p0:p1].real)
        eriR += lib.ddot(pqkIv, ao[p0:p1].imag)
        pqkR = pqkI = None
    return eriR
Beispiel #19
0
 def contract_k(pLqR, pLqI, pjqR, pjqI):
     # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj'
     #:Lpq = LpqR + LpqI*1j
     #:j3c = j3cR + j3cI*1j
     #:for i in range(nset):
     #:    tmp = numpy.dot(dms[i], j3c.reshape(nao,-1))
     #:    vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao))
     #:    tmp = numpy.dot(dms[i], Lpq.reshape(nao,-1))
     #:    vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao))
     #:    vkR[i] += vk1.real
     #:    vkI[i] += vk1.imag
     nrow = pLqR.shape[1]
     tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf3R)
     if k_real:
         for i in range(nset):
             tmpR = lib.ddot(dmsR[i], pjqR.reshape(nao,-1), 1, tmpR)
             vk1R = lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao))
             vkR[i] += vk1R
             if hermi:
                 vkR[i] += vk1R.T
             else:
                 tmpR = lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR)
                 lib.ddot(pjqR.reshape(-1,nao).T, tmpR.reshape(-1,nao),
                          1, vkR[i], 1)
     else:
         tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf3I)
         for i in range(nset):
             tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pjqR.reshape(nao,-1),
                                 pjqI.reshape(nao,-1), 1, tmpR, tmpI, 0)
             vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T,
                                 tmpR.reshape(-1,nao), tmpI.reshape(-1,nao))
             vkR[i] += vk1R
             vkI[i] += vk1I
             if hermi:
                 vkR[i] += vk1R.T
                 vkI[i] -= vk1I.T
             else:
                 tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1),
                                     pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0)
                 zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T,
                        tmpR.reshape(-1,nao), tmpI.reshape(-1,nao),
                        1, vkR[i], vkI[i], 1)
Beispiel #20
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)

    # The ideal way to hold the temporary integrals is to store them in the
    # cderi_file and overwrite them inplace in the second pass.  The current
    # HDF5 library does not have an efficient way to manage free space in
    # overwriting.  It often leads to the cderi_file ~2 times larger than the
    # necessary size.  For now, dumping the DF integral intermediates to a
    # separated temporary file can avoid this issue.  The DF intermediates may
    # be terribly huge. The temporary file should be placed in the same disk
    # as cderi_file.
    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2',
                    kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)

# An alternative method to evalute j2c. This method might have larger numerical error?
#    chgcell = make_modchg_basis(auxcell, mydf.eta)
#    for k, kpt in enumerate(uniq_kpts):
#        aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T
#        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh))
#        LkR = aoaux.real * coulG
#        LkI = aoaux.imag * coulG
#        j2caux = numpy.zeros_like(j2c[k])
#        j2caux[naux:,naux:] = j2c[k][naux:,naux:]
#        if is_zero(kpt):  # kpti == kptj
#            j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T)
#            j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T)
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#            vbar = fuse(mydf.auxbar(fused_cell))
#            s = (vbar != 0).astype(numpy.double)
#            j2c[k] -= numpy.einsum('i,j->ij', vbar, s)
#            j2c[k] -= numpy.einsum('i,j->ij', s, vbar)
#        else:
#            j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T)
#            j2caux[naux:,naux:] -= j2cR + j2cI * 1j
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#        fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T
#        aoaux = LkR = LkI = coulG = None

    if cell.dimension == 1 or cell.dimension == 2:
        plain_ints = _gaussian_int(fused_cell)

    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh))
        for p0, p1 in lib.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt)
            if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt):
                G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1])
                aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints)

            aoaux = aoaux.T
            LkR = aoaux.real * coulG[p0:p1]
            LkI = aoaux.imag * coulG[p0:p1]
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T)
                j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T)
                j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T
            else:
                j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T)
                j2c[k][naux:] -= j2cR + j2cI * 1j
                j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj()
            LkR = LkI = None
        fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T
    j2c = coulG = None

    feri = h5py.File(cderi_file, 'w')
    feri['j3c-kptij'] = kptij_lst
    nsegs = len(fswap['j3c-junk/0'])
    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt):
            G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv)
            s = plain_ints[-Gaux.shape[1]:]  # Only compensated Gaussians
            Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1,1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None
        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        try:
            j2c = scipy.linalg.cholesky(j2c, lower=True)
            j2ctag = 'CD'
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug('cond = %.4g, drop %d bfns',
                      w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold))
            v = v[:,w>mydf.linear_dep_threshold].T.conj()
            v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
            j2c = v
            j2ctag = 'eig'
        naux0 = j2c.shape[0]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = mydf.auxbar(fused_cell)
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128)
        def pw_contract(istep, sh_range, j3cR, j3cI):
            bstart, bend, ncol = sh_range
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                            b, gxyz[p0:p1], Gvbase, kpt,
                                            adapted_kptjs, out=buf)

                if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt):
                    G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1])
                    if SI_on_z.size > 0:
                        for k, aoao in enumerate(dat):
                            aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k])
                            aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T)
                            vG_mod = numpy.einsum('ig,g,g->i', aux.conj(),
                                                  wcoulG[p0:p1][G0idx], SI_on_z)
                            if gamma_point(adapted_kptjs[k]):
                                j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k]
                            else:
                                tmp = vG_mod[:,None] * ovlp[k]
                                j3cR[k][:naux] -= tmp.real
                                j3cI[k][:naux] -= tmp.imag
                            tmp = aux = vG_mod

                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG,ncol)
                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                else:
                    v = lib.dot(j2c, v)
                feri['j3c/%d/%d'%(ji,istep)] = v

        with lib.call_in_background(pw_contract) as compute:
            col1 = 0
            for istep, sh_range in enumerate(shranges):
                log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                           istep+1, len(shranges), *sh_range)
                bstart, bend, ncol = sh_range
                col0, col1 = col1, col1+ncol
                j3cR = []
                j3cI = []
                for k, idx in enumerate(adapted_ji_idx):
                    v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T
                                      for i in range(nsegs)])
                    if is_zero(kpt) and cell.dimension == 3:
                        for i in numpy.where(vbar != 0)[0]:
                            v[i] -= vbar[i] * ovlp[k][col0:col1]
                    j3cR.append(numpy.asarray(v.real, order='C'))
                    if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                        j3cI.append(None)
                    else:
                        j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
                compute(istep, sh_range, j3cR, j3cI)
        for ji in adapted_ji_idx:
            del(fswap['j3c-junk/%d'%ji])

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    feri.close()
Beispiel #21
0
def _contract_vvvv_t2(mycc, mol, vvL, t2, out=None, verbose=None):
    '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv)

    Args:
        vvvv : None or integral object
            if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm
    '''
    _dgemm = lib.numpy_helper._dgemm
    time0 = time.clock(), time.time()
    log = logger.new_logger(mol, verbose)

    naux = vvL.shape[-1]
    nvira, nvirb = t2.shape[-2:]
    x2 = t2.reshape(-1, nvira, nvirb)
    nocc2 = x2.shape[0]
    nvir2 = nvira * nvirb
    Ht2 = numpy.ndarray(x2.shape, buffer=out)
    Ht2[:] = 0

    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])

    def contract_blk_(eri, i0, i1, j0, j1):
        ic = i1 - i0
        jc = j1 - j0
        #:Ht2[:,j0:j1] += numpy.einsum('xef,efab->xab', x2[:,i0:i1], eri)
        _dgemm('N', 'N', nocc2, jc * nvirb, ic * nvirb, x2.reshape(-1, nvir2),
               eri.reshape(-1, jc * nvirb), Ht2.reshape(-1, nvir2), 1, 1,
               i0 * nvirb, 0, j0 * nvirb)

        if i0 > j0:
            #:Ht2[:,i0:i1] += numpy.einsum('xef,abef->xab', x2[:,j0:j1], eri)
            _dgemm('N', 'T', nocc2, ic * nvirb, jc * nvirb,
                   x2.reshape(-1, nvir2), eri.reshape(-1, jc * nvirb),
                   Ht2.reshape(-1, nvir2), 1, 1, j0 * nvirb, 0, i0 * nvirb)


#TODO: check if vvL can be entirely loaded into memory

    nvir_pair = nvirb * (nvirb + 1) // 2
    dmax = numpy.sqrt(max_memory * .7e6 / 8 / nvirb**2 / 2)
    dmax = int(min((nvira + 3) // 4, max(ccsd.BLKMIN, dmax)))
    vvblk = (max_memory * 1e6 / 8 - dmax**2 * (nvirb**2 * 1.5 + naux)) / naux
    vvblk = int(min((nvira + 3) // 4, max(ccsd.BLKMIN, vvblk / naux)))
    eribuf = numpy.empty((dmax, dmax, nvir_pair))
    loadbuf = numpy.empty((dmax, dmax, nvirb, nvirb))
    tril2sq = lib.square_mat_in_trilu_indices(nvira)

    for i0, i1 in lib.prange(0, nvira, dmax):
        off0 = i0 * (i0 + 1) // 2
        off1 = i1 * (i1 + 1) // 2
        vvL0 = _cp(vvL[off0:off1])
        for j0, j1 in lib.prange(0, i1, dmax):
            ijL = vvL0[tril2sq[i0:i1, j0:j1] - off0].reshape(-1, naux)
            eri = numpy.ndarray(((i1 - i0) * (j1 - j0), nvir_pair),
                                buffer=eribuf)
            for p0, p1 in lib.prange(0, nvir_pair, vvblk):
                vvL1 = _cp(vvL[p0:p1])
                eri[:, p0:p1] = lib.ddot(ijL, vvL1.T)
                vvL1 = None

            tmp = numpy.ndarray((i1 - i0, nvirb, j1 - j0, nvirb),
                                buffer=loadbuf)
            _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                   eri.ctypes.data_as(ctypes.c_void_p),
                                   (ctypes.c_int * 4)(i0, i1, j0, j1),
                                   ctypes.c_int(nvirb))
            contract_blk_(tmp, i0, i1, j0, j1)
            time0 = log.timer_debug1('vvvv [%d:%d,%d:%d]' % (i0, i1, j0, j1),
                                     *time0)
    return Ht2.reshape(t2.shape)
Beispiel #22
0
def _make_df_eris(cc, mo_coeff=None):
    eris = _ChemistsERIs()
    eris._common_init_(cc, mo_coeff)
    nocc = eris.nocc
    nmo = eris.fock.shape[0]
    nvir = nmo - nocc
    nvir_pair = nvir * (nvir + 1) // 2
    with_df = cc.with_df
    naux = eris.naux = with_df.get_naoaux()

    eris.feri = lib.H5TmpFile()
    eris.oooo = eris.feri.create_dataset('oooo', (nocc, nocc, nocc, nocc),
                                         'f8')
    eris.ovoo = eris.feri.create_dataset('ovoo', (nocc, nvir, nocc, nocc),
                                         'f8',
                                         chunks=(nocc, 1, nocc, nocc))
    eris.ovov = eris.feri.create_dataset('ovov', (nocc, nvir, nocc, nvir),
                                         'f8',
                                         chunks=(nocc, 1, nocc, nvir))
    eris.ovvo = eris.feri.create_dataset('ovvo', (nocc, nvir, nvir, nocc),
                                         'f8',
                                         chunks=(nocc, 1, nvir, nocc))
    eris.oovv = eris.feri.create_dataset('oovv', (nocc, nocc, nvir, nvir),
                                         'f8',
                                         chunks=(nocc, nocc, 1, nvir))
    # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB
    chunks = (min(nvir_pair,
                  int(4e8 / with_df.blockdim)), min(naux, with_df.blockdim))
    eris.vvL = eris.feri.create_dataset('vvL', (nvir_pair, naux),
                                        'f8',
                                        chunks=chunks)

    Loo = numpy.empty((naux, nocc, nocc))
    Lov = numpy.empty((naux, nocc, nvir))
    mo = numpy.asarray(eris.mo_coeff, order='F')
    ijslice = (0, nmo, 0, nmo)
    p1 = 0
    Lpq = None
    for k, eri1 in enumerate(with_df.loop()):
        Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', mosym='s1', out=Lpq)
        p0, p1 = p1, p1 + Lpq.shape[0]
        Lpq = Lpq.reshape(p1 - p0, nmo, nmo)
        Loo[p0:p1] = Lpq[:, :nocc, :nocc]
        Lov[p0:p1] = Lpq[:, :nocc, nocc:]
        Lvv = lib.pack_tril(Lpq[:, nocc:, nocc:])
        eris.vvL[:, p0:p1] = Lvv.T
    Lpq = Lvv = None
    Loo = Loo.reshape(naux, nocc**2)
    #Lvo = Lov.transpose(0,2,1).reshape(naux,nvir*nocc)
    Lov = Lov.reshape(naux, nocc * nvir)
    eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc, nocc, nocc, nocc)
    eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc, nvir, nocc, nocc)
    ovov = lib.ddot(Lov.T, Lov).reshape(nocc, nvir, nocc, nvir)
    eris.ovov[:] = ovov
    eris.ovvo[:] = ovov.transpose(0, 1, 3, 2)
    ovov = None

    mem_now = lib.current_memory()[0]
    max_memory = max(0, cc.max_memory - mem_now)
    blksize = max(
        ccsd.BLKMIN,
        int((max_memory * .9e6 / 8 - nocc**2 * nvir_pair) / (nocc**2 + naux)))
    oovv_tril = numpy.empty((nocc * nocc, nvir_pair))
    for p0, p1 in lib.prange(0, nvir_pair, blksize):
        oovv_tril[:, p0:p1] = lib.ddot(Loo.T, _cp(eris.vvL[p0:p1]).T)
    eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc, nocc, nvir, nvir)
    oovv_tril = Loo = None

    Lov = Lov.reshape(naux, nocc, nvir)
    vblk = max(nocc, int((max_memory * .15e6 / 8) / (nocc * nvir_pair)))
    vvblk = int(
        min(nvir_pair, 4e8 / nocc,
            max(4, (max_memory * .8e6 / 8) / (vblk * nocc + naux))))
    eris.ovvv = eris.feri.create_dataset('ovvv', (nocc, nvir, nvir_pair),
                                         'f8',
                                         chunks=(nocc, 1, vvblk))
    for q0, q1 in lib.prange(0, nvir_pair, vvblk):
        vvL = _cp(eris.vvL[q0:q1])
        for p0, p1 in lib.prange(0, nvir, vblk):
            tmpLov = _cp(Lov[:, :, p0:p1]).reshape(naux, -1)
            eris.ovvv[:, p0:p1,
                      q0:q1] = lib.ddot(tmpLov.T,
                                        vvL.T).reshape(nocc, p1 - p0, q1 - q0)
        vvL = None
    return eris
Beispiel #23
0
def general(mydf, mo_coeffs, kpts=None, compact=True):
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs, ) * 4
    q = kptj - kpti
    coulG = mydf.weighted_coulG(q, False, mydf.gs)
    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5)

    ####################
    # gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl) and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0],
                                                     mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2],
                                                     mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair, nkl_pair))
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2])
               and iden_coeffs(mo_coeffs[1], mo_coeffs[3]))

        ijR = ijI = klR = klI = buf = None
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory,
                                aosym='s2'):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            buf = lib.transpose(pqkR, out=buf)
            ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR,
                               klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR, 1, eri_mo, 1)
            buf = lib.transpose(pqkI, out=buf)
            ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI,
                               klmosym, mokl, klslice, sym)
            lib.ddot(ijI.T, klI, 1, eri_mo, 1)
            pqkR = pqkI = None
        return eri_mo

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
    elif is_zero(kpti - kptl) and is_zero(kptj - kptk):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3])
               and iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = buf = None
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory):
            buf = lib.transpose(pqkR + pqkI * 1j, out=buf)
            buf *= numpy.sqrt(coulG[p0:p1]).reshape(-1, 1)
            zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk,
                               lkslice, sym)
            lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1)
            pqkR = pqkI = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1))
        return eri_mo.reshape(nij_pair, nlk_pair)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex)

        tao = []
        ao_loc = None
        zij = zkl = buf = None
        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5),
                         mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)):
            buf = lib.transpose(pqkR + pqkI * 1j, out=buf)
            zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij)
            buf = lib.transpose(rskR - rskI * 1j, out=buf)
            zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl)
            zij *= coulG[p0:p1].reshape(-1, 1)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            pqkR = pqkI = rskR = rskI = None
        return eri_mo
Beispiel #24
0
def general(mydf, mo_coeffs, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * 0.5)

    ####################
    # gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair, nkl_pair))
        sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])
        ijR = klR = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR, 1, eri_mo, 1)
            LpqR = LpqI = None
        return eri_mo

    elif (abs(kpti - kptk).sum() < KPT_DIFF_TOL) and (abs(kptj - kptl).sum() < KPT_DIFF_TOL):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex)
        sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])

        zij = zkl = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR + LpqI * 1j
            zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            LpqR = LpqI = buf = None
        return eri_mo

    ####################
    # (kpt) i == j == k == l != 0
    # (kpt) i == l && j == k && i != j && j != k  =>
    #
    elif (abs(kpti - kptl).sum() < KPT_DIFF_TOL) and (abs(kptj - kptk).sum() < KPT_DIFF_TOL):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex)
        sym = iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])

        zij = zlk = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR + LpqI * 1j
            zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym)
            lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1)
            LpqR = LpqI = buf = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1))
        return eri_mo.reshape(nij_pair, nlk_pair)

    ####################
    # aosym = s1, complex integrals
    #
    # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
    # vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
    # So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
    #
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex)

        zij = zkl = None
        for (LpqR, LpqI), (LrsR, LrsI) in lib.izip(
            mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)
        ):
            zij, zkl = _ztrans(LpqR + LpqI * 1j, zij, moij, ijslice, LrsR + LrsI * 1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eri_mo
Beispiel #25
0
def _ao2mo_ovov(mp, orbs, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)
    orboa = numpy.asarray(orbs[0], order='F')
    orbva = numpy.asarray(orbs[1], order='F')
    orbob = numpy.asarray(orbs[2], order='F')
    orbvb = numpy.asarray(orbs[3], order='F')
    nao, nocca = orboa.shape
    noccb = orbob.shape[1]
    nvira = orbva.shape[1]
    nvirb = orbvb.shape[1]

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nbas = mol.nbas
    assert(nvira <= nao)
    assert(nvirb <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(4, min(nao/3, numpy.sqrt(max_memory*.95e6/8/(nao+nocca)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao,dmax,dmax,nao))
    ftmp = lib.H5TmpFile()
    disk = (nocca**2*(nao*(nao+dmax)/2+nvira**2) +
            noccb**2*(nao*(nao+dmax)/2+nvirb**2) +
            nocca*noccb*(nao**2+nvira*nvirb))
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax, disk*8/1e6)

    fint = gto.moleintor.getints4c
    aa_blk_slices = []
    ab_blk_slices = []
    count_ab = 0
    count_aa = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ish0, ish1, ni in sh_ranges:
            for jsh0, jsh1, nj in sh_ranges:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]

                eri = fint(int2e, mol._atm, mol._bas, mol._env,
                           shls_slice=(0,nbas,ish0,ish1, jsh0,jsh1,0,nbas),
                           aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = lib.ddot(orboa.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao))
                tmp_li = lib.ddot(orbob.T, tmp_i.reshape(nocca*(i1-i0)*(j1-j0),nao).T)
                tmp_li = tmp_li.reshape(noccb,nocca,(i1-i0),(j1-j0))
                save('ab/%d'%count_ab, tmp_li.transpose(1,0,2,3))
                ab_blk_slices.append((i0,i1,j0,j1))
                count_ab += 1

                if ish0 >= jsh0:
                    tmp_li = lib.ddot(orboa.T, tmp_i.reshape(nocca*(i1-i0)*(j1-j0),nao).T)
                    tmp_li = tmp_li.reshape(nocca,nocca,(i1-i0),(j1-j0))
                    save('aa/%d'%count_aa, tmp_li.transpose(1,0,2,3))

                    tmp_i = lib.ddot(orbob.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao))
                    tmp_li = lib.ddot(orbob.T, tmp_i.reshape(noccb*(i1-i0)*(j1-j0),nao).T)
                    tmp_li = tmp_li.reshape(noccb,noccb,(i1-i0),(j1-j0))
                    save('bb/%d'%count_aa, tmp_li.transpose(1,0,2,3))
                    aa_blk_slices.append((i0,i1,j0,j1))
                    count_aa += 1

                time1 = log.timer_debug1('partial ao2mo [%d:%d,%d:%d]' %
                                         (ish0,ish1,jsh0,jsh1), *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = None

    fovov = feri.create_dataset('ovov', (nocca*nvira,nocca*nvira), 'f8',
                                chunks=(nvira,nvira))
    fovOV = feri.create_dataset('ovOV', (nocca*nvira,noccb*nvirb), 'f8',
                                chunks=(nvira,nvirb))
    fOVOV = feri.create_dataset('OVOV', (noccb*nvirb,noccb*nvirb), 'f8',
                                chunks=(nvirb,nvirb))
    occblk = int(min(max(nocca,noccb),
                     max(4, 250/nocca, max_memory*.9e6/8/(nao**2*nocca)/5)))

    def load_aa(h5g, nocc, i0, eri):
        if i0 < nocc:
            i1 = min(i0+occblk, nocc)
            for k, (p0,p1,q0,q1) in enumerate(aa_blk_slices):
                eri[:i1-i0,:,p0:p1,q0:q1] = h5g[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(h5g[str(k)][:,i0:i1])
                    eri[:i1-i0,:,q0:q1,p0:p1] = dat.transpose(1,0,3,2)

    def load_ab(h5g, nocca, i0, eri):
        if i0 < nocca:
            i1 = min(i0+occblk, nocca)
            for k, (p0,p1,q0,q1) in enumerate(ab_blk_slices):
                eri[:i1-i0,:,p0:p1,q0:q1] = h5g[str(k)][i0:i1]

    def save(h5dat, nvir, i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i*nvir:(i+1)*nvir] = dat[i-i0].reshape(nvir,-1)

    with lib.call_in_background(save) as bsave:
        with lib.call_in_background(load_aa) as prefetch:
            buf_prefecth = numpy.empty((occblk,nocca,nao,nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['aa'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['aa'], nocca, i1, buf_prefecth)
                eri = buf[:i1-i0].reshape((i1-i0)*nocca,nao,nao)
                dat = _ao2mo.nr_e2(eri, orbva, (0,nvira,0,nvira), 's1', 's1')
                bsave(fovov, nvira, i0, i1,
                      dat.reshape(i1-i0,nocca,nvira,nvira).transpose(0,2,1,3))
                time1 = log.timer_debug1('pass2 ao2mo for aa [%d:%d]' % (i0,i1), *time1)

            buf_prefecth = numpy.empty((occblk,noccb,nao,nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['bb'], noccb, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, noccb, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['bb'], noccb, i1, buf_prefecth)
                eri = buf[:i1-i0].reshape((i1-i0)*noccb,nao,nao)
                dat = _ao2mo.nr_e2(eri, orbvb, (0,nvirb,0,nvirb), 's1', 's1')
                bsave(fOVOV, nvirb, i0, i1,
                      dat.reshape(i1-i0,noccb,nvirb,nvirb).transpose(0,2,1,3))
                time1 = log.timer_debug1('pass2 ao2mo for bb [%d:%d]' % (i0,i1), *time1)

        orbvab = numpy.asarray(numpy.hstack((orbva, orbvb)), order='F')
        with lib.call_in_background(load_ab) as prefetch:
            load_ab(ftmp['ab'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['ab'], nocca, i1, buf_prefecth)
                eri = buf[:i1-i0].reshape((i1-i0)*noccb,nao,nao)
                dat = _ao2mo.nr_e2(eri, orbvab, (0,nvira,nvira,nvira+nvirb), 's1', 's1')
                bsave(fovOV, nvira, i0, i1,
                      dat.reshape(i1-i0,noccb,nvira,nvirb).transpose(0,2,1,3))
                time1 = log.timer_debug1('pass2 ao2mo for ab [%d:%d]' % (i0,i1), *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
Beispiel #26
0
def contract(myci, civec, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(myci.stdout, myci.verbose)
    nocc = myci.nocc
    nmo = myci.nmo
    nvir = nmo - nocc
    nov = nocc * nvir
    noo = nocc**2
    c0 = civec[0]
    c1 = civec[1:nov+1].reshape(nocc,nvir)
    c2 = civec[nov+1:].reshape(nocc,nocc,nvir,nvir)

    cinew = numpy.zeros_like(civec)
    t1 = cinew[1:nov+1].reshape(nocc,nvir)
    t2 = cinew[nov+1:].reshape(nocc,nocc,nvir,nvir)
    t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir))
    myci.add_wvvVV_(c2, eris, t2new_tril)
    for i in range(nocc):
        for j in range(i+1):
            t2[i,j] = t2new_tril[i*(i+1)//2+j]
        t2[i,i] *= .5
    t2new_tril = None
    time1 = log.timer_debug1('vvvv', *time0)
    #:t2 += numpy.einsum('iklj,klab->ijab', _cp(eris.oooo)*.5, c2)
    oooo = lib.transpose(_cp(eris.oooo).reshape(nocc,noo,nocc), axes=(0,2,1))
    lib.ddot(oooo.reshape(noo,noo), c2.reshape(noo,-1), .5, t2.reshape(noo,-1), 1)

    foo = eris.fock[:nocc,:nocc].copy()
    fov = eris.fock[:nocc,nocc:].copy()
    fvv = eris.fock[nocc:,nocc:].copy()

    t1+= fov * c0
    t1+= numpy.einsum('ib,ab->ia', c1, fvv)
    t1-= numpy.einsum('ja,ji->ia', c1, foo)

    #:t2 += numpy.einsum('bc,ijac->ijab', fvv, c2)
    #:t2 -= numpy.einsum('kj,kiba->ijab', foo, c2)
    #:t2 += numpy.einsum('ia,jb->ijab', c1, fov)
    lib.ddot(c2.reshape(-1,nvir), fvv, 1, t2.reshape(-1,nvir), 1)
    lib.ddot(foo, c2.reshape(nocc,-1),-1, t2.reshape(nocc,-1), 1)
    for j in range(nocc):
        t2[:,j] += numpy.einsum('ia,b->iab', c1, fov[j])

    eris_vovv = lib.unpack_tril(eris.vovv).reshape(nvir,nocc,nvir,-1)
    unit = _memory_usage_inloop(nocc, nvir)
    max_memory = max(2000, myci.max_memory - lib.current_memory()[0])
    blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory/unit)))
    log.debug1('max_memory %d MB,  nocc,nvir = %d,%d  blksize = %d',
               max_memory, nocc, nvir, blksize)
    nvir_pair = nvir * (nvir+1) // 2
    for p0, p1 in lib.prange(0, nvir, blksize):
        eris_vvoo = _cp(eris.vvoo[p0:p1])
        oovv = lib.transpose(eris_vvoo.reshape(-1,nocc**2))
        #:eris_oVoV = eris_vvoo.transpose(2,0,3,1)
        eris_oVoV = numpy.ndarray((nocc,p1-p0,nocc,nvir))
        eris_oVoV[:] = oovv.reshape(nocc,nocc,p1-p0,nvir).transpose(0,2,1,3)
        eris_vvoo = oovv = None
        #:tmp = numpy.einsum('ikca,jbkc->jiba', c2, eris_oVoV)
        #:t2[:,:,p0:p1] -= tmp*.5
        #:t2[:,:,p0:p1] -= tmp.transpose(1,0,2,3)
        for i in range(nocc):
            tmp = lib.ddot(eris_oVoV.reshape(-1,nov), c2[i].reshape(nov,nvir))
            tmp = tmp.reshape(nocc,p1-p0,nvir)
            t2[:,i,p0:p1] -= tmp*.5
            t2[i,:,p0:p1] -= tmp

        eris_voov = _cp(eris.voov[p0:p1])
        for i in range(p0, p1):
            t2[:,:,i] += eris_voov[i-p0] * (c0 * .5)
        t1[:,p0:p1] += numpy.einsum('jb,aijb->ia', c1, eris_voov) * 2
        t1[:,p0:p1] -= numpy.einsum('jb,iajb->ia', c1, eris_oVoV)

        #:ovov = eris_voov.transpose(2,0,1,3) - eris_vvoo.transpose(2,0,3,1)
        ovov = eris_oVoV
        ovov *= -.5
        for i in range(nocc):
            ovov[i] += eris_voov[:,:,i]
        eris_oVoV = eris_vvoo = None
        #:theta = c2[:,:,p0:p1]
        #:theta = theta * 2 - theta.transpose(1,0,2,3)
        #:theta = theta.transpose(2,0,1,3)
        theta = numpy.ndarray((p1-p0,nocc,nocc,nvir))
        for i in range(p0, p1):
            theta[i-p0] = c2[:,:,i] * 2
            theta[i-p0]-= c2[:,:,i].transpose(1,0,2)
        #:t2 += numpy.einsum('ckia,jckb->ijab', theta, ovov)
        for j in range(nocc):
            tmp = lib.ddot(theta.reshape(-1,nov).T, ovov[j].reshape(-1,nvir))
            t2[:,j] += tmp.reshape(nocc,nvir,nvir)
        tmp = ovov = None

        t1[:,p0:p1] += numpy.einsum('aijb,jb->ia', theta, fov)

        eris_vooo = _cp(eris.vooo[p0:p1])
        #:t1 -= numpy.einsum('bjka,bjki->ia', theta, eris_vooo)
        #:t2[:,:,p0:p1] -= numpy.einsum('ka,bjik->jiba', c1, eris_vooo)
        lib.ddot(eris_vooo.reshape(-1,nocc).T, theta.reshape(-1,nvir), -1, t1, 1)
        for i in range(p0, p1):
            t2[:,:,i] -= lib.ddot(eris_vooo[i-p0].reshape(noo,-1), c1).reshape(nocc,nocc,-1)
        eris_vooo = None

        eris_vovv = _cp(eris.vovv[p0:p1]).reshape(-1,nvir_pair)
        eris_vovv = lib.unpack_tril(eris_vovv).reshape(p1-p0,nocc,nvir,nvir)
        #:t1 += numpy.einsum('cjib,cjba->ia', theta, eris_vovv)
        #:t2[:,:,p0:p1] += numpy.einsum('jc,aibc->ijab', c1, eris_vovv)
        theta = lib.transpose(theta.reshape(-1,nocc,nvir), axes=(0,2,1))
        lib.ddot(theta.reshape(-1,nocc).T, eris_vovv.reshape(-1,nvir), 1, t1, 1)
        for i in range(p0, p1):
            tmp = lib.ddot(c1, eris_vovv[i-p0].reshape(-1,nvir).T)
            t2[:,:,i] += tmp.reshape(nocc,nocc,nvir).transpose(1,0,2)
        tmp = eris_vovv = None

    for i in range(nocc):
        for j in range(i+1):
            t2[i,j]+= t2[j,i].T
            t2[j,i] = t2[i,j].T

    cinew[0] += numpy.einsum('ia,ia->', fov, c1) * 2
    cinew[0] += numpy.einsum('aijb,ijab->', eris.voov, c2) * 2
    cinew[0] -= numpy.einsum('aijb,jiab->', eris.voov, c2)
    return cinew
Beispiel #27
0
def general(mydf, mo_coeffs, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)):
    warn_pbc2d_eri(mydf)
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros([mo.shape[1] for mo in mo_coeffs])

    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]))

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl) and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair,nkl_pair))
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))
        ijR = klR = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True):
            ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice,
                               LpqR, klR, klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR, sign, eri_mo, 1)
            LpqR = LpqI = None
        return eri_mo

    elif is_zero(kpti-kptk) and is_zero(kptj-kptl):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))

        zij = zkl = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zkl = _ztrans(buf, zij, moij, ijslice,
                               buf, zkl, mokl, klslice, sym)
            lib.dot(zij.T, zkl, sign, eri_mo, 1)
            LpqR = LpqI = buf = None
        return eri_mo

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zlk = _ztrans(buf, zij, moij, ijslice,
                               buf, zlk, molk, lkslice, sym)
            lib.dot(zij.T, zlk.conj(), sign, eri_mo, 1)
            LpqR = LpqI = buf = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1))
        return eri_mo.reshape(nij_pair,nlk_pair)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        nao = mo_coeffs[0].shape[0]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)

        blksize = int(min(max_memory*.3e6/16/nij_pair,
                          max_memory*.3e6/16/nkl_pair,
                          max_memory*.3e6/16/nao**2))
        zij = zkl = None
        for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize),
                         mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)):
            zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice,
                               LrsR+LrsI*1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, sign, eri_mo, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eri_mo
Beispiel #28
0
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)
    outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph',
                   kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []
#    chgcell = make_modchg_basis(auxcell, mydf.eta)
#    for k, kpt in enumerate(uniq_kpts):
#        aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T
#        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
#        LkR = aoaux.real * coulG
#        LkI = aoaux.imag * coulG
#        j2caux = numpy.zeros_like(j2c[k])
#        j2caux[naux:,naux:] = j2c[naux:,naux:]
#        if is_zero(kpt):  # kpti == kptj
#            j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T)
#            j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T)
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#            vbar = fuse(mydf.auxbar(fused_cell))
#            s = (vbar != 0).astype(numpy.double)
#            j2c[k] -= numpy.einsum('i,j->ij', vbar, s)
#            j2c[k] -= numpy.einsum('i,j->ij', s, vbar)
#        else:
#            j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T)
#            j2caux[naux:,naux:] -= j2cR + j2cI * 1j
#            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
#        #j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
#        try:
#            j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True)
#        except scipy.linalg.LinAlgError as e:
#            msg =('===================================\n'
#                  'J-metric not positive definite.\n'
#                  'It is likely that gs is not enough.\n'
#                  '===================================')
#            log.error(msg)
#            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
#        kLR = LkR.T
#        kLI = LkI.T
#        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR)
#        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI)
#        kLR *= coulG.reshape(-1,1)
#        kLI *= coulG.reshape(-1,1)
#        kLRs.append(kLR)
#        kLIs.append(kLI)
#        aoaux = LkR = LkI = kLR = kLI = coulG = None
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        LkR = aoaux.real * coulG
        LkI = aoaux.imag * coulG

        if is_zero(kpt):  # kpti == kptj
            j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T)
            j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T)
            j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T
        else:
            j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T)
            j2c[k][naux:] -= j2cR + j2cI * 1j
            j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj()
        #j2c[k] = fuse(fuse(j2c[k]).T).T.copy()
        try:
            j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True)
        except scipy.linalg.LinAlgError as e:
            msg =('===================================\n'
                  'J-metric not positive definite.\n'
                  'It is likely that gs is not enough.\n'
                  '===================================')
            log.error(msg)
            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
        kLR = LkR[naux:].T
        kLI = LkI[naux:].T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:])
        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:])
        kLR *= coulG.reshape(-1,1)
        kLI *= coulG.reshape(-1,1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = LkR = LkI = kLR = kLI = coulG = None

    feri = h5py.File(mydf._cderi)

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1+ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                            lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                          b, gxyz[p0:p1], Gvbase, kpt,
                                          adapted_kptjs, out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128,
                                             order='F', buffer=buf[k])
                        pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1,2,0)
                        pqkI[:] = aoao.imag.transpose(1,2,0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1,nG)
                        pqkI = pqkI.reshape(-1,nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T,
                               -1, j3cR[k][naux:], j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)

                v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v,
                                                  lower=True, overwrite_b=True)
                feri['j3c/%d'%ji][:naux,col0:col1] = v

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    for k, kptij in enumerate(kptij_lst):
        v = feri['j3c/%d'%k][:naux]
        del(feri['j3c/%d'%k])
        feri['j3c/%d'%k] = v

    feri.close()
Beispiel #29
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    t1 = log.timer_debug1('2c2e', *t1)

    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        j2c_k = numpy.zeros_like(j2c[k])
        for p0, p1 in mydf.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c_k[naux:] += lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T)
                j2c_k[naux:] += lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T)
            else:
                j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1],
                                    LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T)
                j2c_k[naux:] += j2cR + j2cI * 1j
            kLR = kLI = None

        j2c_k[:naux,naux:] = j2c_k[naux:,:naux].conj().T
        j2c[k] -= mpi.allreduce(j2c_k)
        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            fswap['j2c/%d'%k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns',
                       w[0]/w[-1], numpy.count_nonzero(w<mydf.linear_dep_threshold))
            v1 = v[:,w>mydf.linear_dep_threshold].T.conj()
            v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
            fswap['j2c/%d'%k] = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    fswap['j2c-/%d'%k] = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T
            w = v = v1 = None
            j2ctags.append('eig')
    j2c = coulG = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis-kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    t1 = log.timer_debug1('aoaux and int2c', *t1)

# Estimates the buffer size based on the last contraction in G-space.
# This contraction requires to hold nkptj copies of (naux,?) array
# simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse==x).sum() for x in set(uniq_inverse))
    buflen = max(int(min(max_memory*.5e6/16/naux/(nkptj_max+2)/nao,
                         nao/3/mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s',
               max_memory, mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e')

    def gen_int3c(job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1*(i1+1)//2 - i0*(i0+1)//2
        if j_only:
            dij = dii
            buflen = max(8, int(max_memory*1e6/16/(nkptij*dii+dii)))
        else:
            dij = (i1 - i0) * nao
            buflen = max(8, int(max_memory*1e6/16/(nkptij*dij+dij)))
        auxranges = balance_segs(aux_loc[1:]-aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij*dij*buflen, dtype=dtype)
        buf1 = numpy.empty(dij*buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                fswap.create_dataset(key, shape, 'f8')
            else:
                fswap.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e1 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            mat = numpy.ndarray((nkptij,dij,nrow), dtype=dtype, buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = fswap['%s/%d'%(dataname,k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0*(i0+1)//2:i1*(i1+1)//2] - i0 * nao
                    out = numpy.ndarray((nrow,dii), dtype=v.dtype, buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0+nrow] = v.real
                else:
                    h5dat[naux0:naux0+nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(fswap[key])
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol*Gblksize)
        pqkIbuf = numpy.empty(ncol*Gblksize)
        buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128)
        log.alldebug2('job_id %d  blksize (%d,%d)', job_id, Gblksize, ncol)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        fused_cell_slice = (auxcell.nbas, fused_cell.nbas)
        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b,
                               gxyz[p0:p1], Gvbase, kpt)
            Gaux *= wcoulG[p0:p1,None]
            kLR = Gaux.real.copy('C')
            kLI = Gaux.imag.copy('C')
            Gaux = None

            dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b,
                                        gxyz[p0:p1], Gvbase, kpt,
                                        adapted_kptjs, out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG,ncol)
                pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI.T, pqkR.T,  1, j3cI[k][naux:], 1)
            kLR = kLI = None

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v)

    _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)
Beispiel #30
0
def general(mydf, mo_coeffs, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair,nkl_pair))
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))
        ijR = klR = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice,
                               LpqR, klR, klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR, 1, eri_mo, 1)
            LpqR = LpqI = None
        return eri_mo

    elif (abs(kpti-kptk).sum() < KPT_DIFF_TOL) and (abs(kptj-kptl).sum() < KPT_DIFF_TOL):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))

        zij = zkl = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zkl = _ztrans(buf, zij, moij, ijslice,
                               buf, zkl, mokl, klslice, sym)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            LpqR = LpqI = buf = None
        return eri_mo

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
    elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = None
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zlk = _ztrans(buf, zij, moij, ijslice,
                               buf, zlk, molk, lkslice, sym)
            lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1)
            LpqR = LpqI = buf = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1))
        return eri_mo.reshape(nij_pair,nlk_pair)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)

        zij = zkl = None
        for (LpqR, LpqI), (LrsR, LrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice,
                               LrsR+LrsI*1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eri_mo
Beispiel #31
0
    def add_wvvVV_(self, t1, t2, eris, t2new_tril):
        time0 = time.clock(), time.time()
        nocc, nvir = t1.shape

        #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
        #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
        def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1):
            nao = tau.shape[-1]
            ic = i1 - i0
            jc = j1 - j0
            #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, jc*nao, ic*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, i0*nao, 0, j0*nao)

            #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri)
            _dgemm('N', 'T', nocc*(nocc+1)//2, ic*nao, jc*nao,
                   tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao),
                   t2new_tril.reshape(-1,nao*nao), 1, 1, j0*nao, 0, i0*nao)

        def contract_tril_(t2new_tril, tau, eri, a0, a):
            nvir = tau.shape[-1]
            #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri)
            _dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1-a0)*nvir,
                   tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                   t2new_tril.reshape(-1,nvir*nvir), 1, 1, a0*nvir, 0, a*nvir)

            #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a])
            if a > a0:
                _dgemm('N', 'T', nocc*(nocc+1)//2, (a-a0)*nvir, nvir,
                       tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir),
                       t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, a0*nvir)

        if self.direct:   # AO-direct CCSD
            mol = self.mol
            nao, nmo = self.mo_coeff.shape
            nao_pair = nao * (nao+1) // 2
            aos = numpy.asarray(self.mo_coeff[:,nocc:].T, order='F')
            outbuf = numpy.empty((nocc*(nocc+1)//2,nao,nao))
            tau = numpy.ndarray((nocc*(nocc+1)//2,nvir,nvir), buffer=outbuf)
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
                tau[p0:p0+i+1] += t2[i,:i+1]
                p0 += i + 1
            tau = _ao2mo.nr_e2(tau.reshape(-1,nvir**2), aos, (0,nao,0,nao), 's1', 's1')
            tau = tau.reshape(-1,nao,nao)
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)

            ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond',
                                     'CVHFsetnr_direct_scf')
            outbuf[:] = 0
            ao_loc = mol.ao_loc_nr()
            max_memory = max(0, self.max_memory - lib.current_memory()[0])
            dmax = max(4, int(numpy.sqrt(max_memory*.95e6/8/nao**2/2)))
            sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
            dmax = max(x[2] for x in sh_ranges)
            eribuf = numpy.empty((dmax,dmax,nao,nao))
            loadbuf = numpy.empty((dmax,dmax,nao,nao))
            fint = gto.moleintor.getints2e

            for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
                for jsh0, jsh1, nj in sh_ranges[:ip]:
                    eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                               shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl',
                               ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                    i0, i1 = ao_loc[ish0], ao_loc[ish1]
                    j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                    tmp = numpy.ndarray((i1-i0,nao,j1-j0,nao), buffer=loadbuf)
                    _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                           eri.ctypes.data_as(ctypes.c_void_p),
                                           (ctypes.c_int*4)(i0, i1, j0, j1),
                                           ctypes.c_int(nao))
                    contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1)
                    time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                                (ish0,ish1,jsh0,jsh1), *time0)
                eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env,
                           shls_slice=(ish0,ish1,ish0,ish1), aosym='s4',
                           ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf)
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                for i in range(i1-i0):
                    p0, p1 = i*(i+1)//2, (i+1)*(i+2)//2
                    tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf)
                    contract_tril_(outbuf, tau, tmp, i0, i0+i)
                time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' %
                                            (ish0,ish1,ish0,ish1), *time0)
            eribuf = loadbuf = eri = tmp = None

            mo = numpy.asarray(self.mo_coeff, order='F')
            tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,nocc,nmo), 's1', 's1', out=tau)
            t2new_tril += tmp.reshape(-1,nvir,nvir)

            #: tmp = numpy.einsum('ijcd,ka,kdcb->ijba', tau, t1, eris.ovvv)
            #: t2new -= tmp + tmp.transpose(1,0,3,2)
            tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,0,nocc), 's1', 's1', out=tau)
            t2new_tril -= lib.ddot(tmp.reshape(-1,nocc), t1).reshape(-1,nvir,nvir)
            tmp = _ao2mo.nr_e2(outbuf, mo, (0,nocc,nocc,nmo), 's1', 's1', out=tau)
            #: t2new_tril -= numpy.einsum('xkb,ka->xab', tmp.reshape(-1,nocc,nvir), t1)
            tmp = lib.transpose(tmp.reshape(-1,nocc,nvir), axes=(0,2,1), out=outbuf)
            tmp = lib.ddot(tmp.reshape(-1,nocc), t1, 1,
                           numpy.ndarray(t2new_tril.shape, buffer=tau), 0)
            tmp = lib.transpose(tmp.reshape(-1,nvir,nvir), axes=(0,2,1), out=outbuf)
            t2new_tril -= tmp.reshape(-1,nvir,nvir)

        else:
            #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
            #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv)
            tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir))
            p0 = 0
            for i in range(nocc):
                tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1])
                tau[p0:p0+i+1] += t2[i,:i+1]
                p0 += i + 1
            time0 = logger.timer_debug1(self, 'vvvv-tau', *time0)
            p0 = 0
            outbuf = numpy.empty((nvir,nvir,nvir))
            outbuf1 = numpy.empty((nvir,nvir,nvir))
            handler = None
            for a in range(nvir):
                buf = lib.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf)
                outbuf, outbuf1 = outbuf1, outbuf
                handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a)
                p0 += a+1
                time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0)
            handler.join()
        return t2new_tril
Beispiel #32
0
def get_eri(mydf, kpts=None, compact=True):
    cell = mydf.cell
    if kpts is None:
        kptijkl = numpy.zeros((4,3))
    elif numpy.shape(kpts) == (3,):
        kptijkl = numpy.vstack([kpts]*4)
    else:
        kptijkl = numpy.reshape(kpts, (4,3))
    if mydf._cderi is None:
        mydf.build()

    kpti, kptj, kptk, kptl = kptijkl
    auxcell = mydf.auxcell
    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < 1e-9:
        eriR = numpy.zeros((nao_pair,nao_pair))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(j3cR.T, LpqR, 1, eriR, 1)
        eriR = lib.transpose_sum(eriR, inplace=True)

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8
        trilidx = numpy.tril_indices(nao)
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            pqkR = numpy.asarray(pqkR.reshape(nao,nao,-1)[trilidx], order='C')
            pqkI = numpy.asarray(pqkI.reshape(nao,nao,-1)[trilidx], order='C')
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            lib.dot(pqkR, pqkR.T, 1, eriR, 1)
            lib.dot(pqkI, pqkI.T, 1, eriR, 1)
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti-kptl).sum() < 1e-9) and (abs(kptj-kptk).sum() < 1e-9):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1)
        LpqR = LpqI = j3cR = j3cI = None

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
# rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1)
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
# rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        return (eriR.reshape((nao,)*4).transpose(0,1,3,2) +
                eriI.reshape((nao,)*4).transpose(0,1,3,2)*1j).reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1)
        LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None

        coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol
        max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4

        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory),
                         mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
# rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs))
#                 = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs))
#                 = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs)
# rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs))
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
        return eriR + eriI*1j
Beispiel #33
0
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nao, nocc = orbo.shape
    nvir = orbv.shape[1]
    nbas = mol.nbas
    assert(nvir <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(4, min(nao/3, numpy.sqrt(max_memory*.95e6/8/(nao+nocc)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao,dmax,dmax,nao))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax, nocc**2*(nao*(nao+dmax)/2+nvir**2)*8/1e6)

    buf_i = numpy.empty((nocc*dmax**2*nao))
    buf_li = numpy.empty((nocc**2*dmax**2))
    buf1 = numpy.empty_like(buf_li)

    fint = gto.moleintor.getints4c
    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip+1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0,i1,j0,j1))

                eri = fint(int2e, mol._atm, mol._bas, mol._env,
                           shls_slice=(0,nbas,ish0,ish1, jsh0,jsh1,0,nbas),
                           aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = numpy.ndarray((nocc,(i1-i0)*(j1-j0)*nao), buffer=buf_i)
                tmp_li = numpy.ndarray((nocc,nocc*(i1-i0)*(j1-j0)), buffer=buf_li)
                lib.ddot(orbo.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao), c=tmp_i)
                lib.ddot(orbo.T, tmp_i.reshape(nocc*(i1-i0)*(j1-j0),nao).T, c=tmp_li)
                tmp_li = tmp_li.reshape(nocc,nocc,(i1-i0),(j1-j0))
                save(str(count), tmp_li.transpose(1,0,2,3))
                buf_li, buf1 = buf1, buf_li
                count += 1
                time1 = log.timer_debug1('partial ao2mo [%d:%d,%d:%d]' %
                                         (ish0,ish1,jsh0,jsh1), *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None

    h5dat = feri.create_dataset('ovov', (nocc*nvir,nocc*nvir), 'f8',
                                chunks=(nvir,nvir))
    occblk = int(min(nocc, max(4, 250/nocc, max_memory*.9e6/8/(nao**2*nocc)/5)))
    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0+occblk, nocc)
            for k, (p0,p1,q0,q1) in enumerate(jk_blk_slices):
                eri[:i1-i0,:,p0:p1,q0:q1] = ftmp[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k)][:,i0:i1])
                    eri[:i1-i0,:,q0:q1,p0:p1] = dat.transpose(1,0,3,2)

    def save(i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i*nvir:(i+1)*nvir] = dat[i-i0].reshape(nvir,nocc*nvir)

    orbv = numpy.asarray(orbv, order='F')
    buf_prefecth = numpy.empty((occblk,nocc,nao,nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk*nocc,nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1-i0].reshape((i1-i0)*nocc,nao,nao)

                dat = _ao2mo.nr_e2(eri, orbv, (0,nvir,0,nvir), 's1', 's1', out=bufw)
                bsave(i0, i1, dat.reshape(i1-i0,nocc,nvir,nvir).transpose(0,2,1,3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0,i1), *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    return h5dat
Beispiel #34
0
def make_rdm2(ci, nmo, nocc):
    nvir = nmo - nocc
    noo = nocc**2
    nov = nocc * nvir
    c0 = ci[0]
    c1 = ci[1:nocc*nvir+1].reshape(nocc,nvir)
    c2 = ci[nocc*nvir+1:].reshape(nocc,nocc,nvir,nvir)
    doovv = c0*c2
    dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2)
    dovoo =-numpy.einsum('ia,klac->ickl', c1, c2)
    doooo = lib.ddot(c2.reshape(noo,-1), c2.reshape(noo,-1).T).reshape((nocc,)*4)
    dvvvv = lib.ddot(c2.reshape(noo,-1).T, c2.reshape(noo,-1)).reshape((nvir,)*4)

    rdm2 = numpy.zeros((nmo,nmo,nmo,nmo))
    rdm2[:nocc,:nocc,:nocc,:nocc] = doooo*4-doooo.transpose(1,0,2,3)*2
    rdm2[:nocc,nocc:,:nocc,:nocc] = dovoo*4-dovoo.transpose(0,1,3,2)*2
    rdm2[nocc:,:nocc,:nocc,:nocc] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(1,0,3,2)
    rdm2[:nocc,:nocc,:nocc,nocc:] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(2,3,0,1)
    rdm2[:nocc,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(3,2,1,0)

    rdm2[:nocc,:nocc,nocc:,nocc:] = doovv*4-doovv.transpose(1,0,2,3)*2
    rdm2[nocc:,nocc:,:nocc,:nocc] = rdm2[:nocc,:nocc,nocc:,nocc:].transpose(2,3,0,1)
    rdm2[nocc:,nocc:,nocc:,:nocc] = dvvvo*4-dvvvo.transpose(1,0,2,3)*2
    rdm2[nocc:,nocc:,:nocc,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(1,0,3,2)
    rdm2[nocc:,:nocc,nocc:,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(2,3,0,1)
    rdm2[:nocc,nocc:,nocc:,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(3,2,1,0)
    rdm2[nocc:,nocc:,nocc:,nocc:] = dvvvv*4-dvvvv.transpose(1,0,2,3)*2

    # Fixme: This seems giving right answer, but not based on solid formula
    theta = c2*2 - c2.transpose(1,0,2,3)
    dovov  = numpy.einsum('ia,kc->icka', c1, c1) * -2
    #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2
    #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2
    dovov -= lib.ddot(c2.transpose(0,2,1,3).reshape(nov,-1).T,
                      theta.transpose(0,2,1,3).reshape(nov,-1),
                      2).reshape(nocc,nvir,nocc,nvir).transpose(0,3,2,1)
    dovov -= lib.ddot(c2.transpose(0,3,1,2).reshape(nov,-1),
                      theta.transpose(0,3,1,2).reshape(nov,-1).T,
                      2).reshape(nocc,nvir,nocc,nvir).transpose(0,3,2,1)
    dvoov  = numpy.einsum('ia,kc->cika', c1, c1) * 4
    #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2
    dvoov += lib.ddot(theta.transpose(0,2,1,3).reshape(nov,-1).T,
                      theta.transpose(0,2,1,3).reshape(nov,-1),
                      2).reshape(nocc,nvir,nocc,nvir).transpose(1,2,0,3)

#    rdm2[:nocc,nocc:,:nocc,nocc:] = dovov*4-dvoov.transpose(1,0,2,3)*2
#    rdm2[nocc:,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,nocc:].transpose(1,0,3,2)
#    rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov*4-dovov.transpose(1,0,2,3)*2
#    rdm2[:nocc,nocc:,nocc:,:nocc] = rdm2[nocc:,:nocc,:nocc,nocc:].transpose(1,0,3,2)
    rdm2[:nocc,nocc:,:nocc,nocc:] = dovov
    rdm2[nocc:,:nocc,nocc:,:nocc] = dovov.transpose(1,0,3,2)
    rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov
    rdm2[:nocc,nocc:,nocc:,:nocc] = dvoov.transpose(1,0,3,2)

    rdm1 = make_rdm1(ci, nmo, nocc)
    for i in range(nocc):
        rdm1[i,i] -= 2
    for i in range(nocc):
        for j in range(nocc):
            rdm2[i,j,i,j] += 4
            rdm2[i,j,j,i] -= 2
        rdm2[i,:,i,:] += rdm1 * 2
        rdm2[:,i,:,i] += rdm1 * 2
        rdm2[:,i,i,:] -= rdm1
        rdm2[i,:,:,i] -= rdm1

    return rdm2
Beispiel #35
0
def get_eri(mydf, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    nao = cell.nao_nr()
    kptijkl = _format_kpts(kpts)
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros((nao,nao,nao,nao))

    kpti, kptj, kptk, kptl = kptijkl
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6)

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl):
        eriR = numpy.zeros((nao_pair,nao_pair))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(LpqR.T, LpqR, sign, eriR, 1)
            LpqR = LpqI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

    elif is_zero(kpti-kptk) and is_zero(kptj-kptl):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1)
            LpqR = LpqI = None
        return eriR + eriI*1j

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1)
            LpqR = LpqI = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
        eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1))
        return eri.reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        blksize = int(max_memory*.4e6/16/nao**2)
        for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize),
                         mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)):
            zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, sign, eriR, eriI, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eriR + eriI*1j
Beispiel #36
0
def _make_j3c(mydf, cell, auxcell, kptij_lst):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)
    outcore.aux_e2(cell,
                   fused_cell,
                   mydf._cderi,
                   'cint3c2e_sph',
                   kptij_lst=kptij_lst,
                   dataname='j3c',
                   max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts)
    kLRs = []
    kLIs = []

    # An alternative method to evalute j2c. This method might have larger numerical error?
    #    chgcell = make_modchg_basis(auxcell, mydf.eta)
    #    for k, kpt in enumerate(uniq_kpts):
    #        aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T
    #        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
    #        LkR = aoaux.real * coulG
    #        LkI = aoaux.imag * coulG
    #        j2caux = numpy.zeros_like(j2c[k])
    #        j2caux[naux:,naux:] = j2c[k][naux:,naux:]
    #        if is_zero(kpt):  # kpti == kptj
    #            j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T)
    #            j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T)
    #            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
    #            vbar = fuse(mydf.auxbar(fused_cell))
    #            s = (vbar != 0).astype(numpy.double)
    #            j2c[k] -= numpy.einsum('i,j->ij', vbar, s)
    #            j2c[k] -= numpy.einsum('i,j->ij', s, vbar)
    #        else:
    #            j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T)
    #            j2caux[naux:,naux:] -= j2cR + j2cI * 1j
    #            j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T)
    #
    #        try:
    #            j2c[k] = scipy.linalg.cholesky(j2c[k], lower=True)
    #        except scipy.linalg.LinAlgError as e:
    #            msg =('===================================\n'
    #                  'J-metric not positive definite.\n'
    #                  'It is likely that gs is not enough.\n'
    #                  '===================================')
    #            log.error(msg)
    #            raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
    #        kLR = LkR.T
    #        kLI = LkI.T
    #        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR)
    #        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI)
    #        kLR *= coulG.reshape(-1,1)
    #        kLI *= coulG.reshape(-1,1)
    #        kLRs.append(kLR)
    #        kLIs.append(kLI)
    #        aoaux = LkR = LkI = kLR = kLI = coulG = None

    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        LkR = aoaux.real * coulG
        LkI = aoaux.imag * coulG

        if is_zero(kpt):  # kpti == kptj
            j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T)
            j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T)
            j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T
        else:
            j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T)
            j2c[k][naux:] -= j2cR + j2cI * 1j
            j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj()

        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            j2c[k] = ('CD', scipy.linalg.cholesky(j2c[k], lower=True))
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that gs is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < LINEAR_DEP_THR))
            v = v[:, w > LINEAR_DEP_THR].T.conj()
            v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1)
            j2c[k] = ('eig', v)
        kLR = LkR[naux:].T
        kLI = LkI[naux:].T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:])
        if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:])
        kLR *= coulG.reshape(-1, 1)
        kLI *= coulG.reshape(-1, 1)
        kLRs.append(kLR)
        kLIs.append(kLI)
        aoaux = LkR = LkI = kLR = kLI = coulG = None

    nauxs = [v[1].shape[0] for v in j2c]
    feri = h5py.File(mydf._cderi)

    def make_kpt(uniq_kptji_id):  # kpt = kptj - kpti
        kpt = uniq_kpts[uniq_kptji_id]
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)
        kLR = kLRs[uniq_kptji_id]
        kLI = kLIs[uniq_kptji_id]

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('cint1e_ovlp_sph',
                                  hermi=1,
                                  kpts=adapted_kptjs)
            for k, ji in enumerate(adapted_ji_idx):
                ovlp[k] = lib.pack_tril(ovlp[k])
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(
            max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1),
            nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(
                16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(
                16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128)

        col1 = 0
        for istep, sh_range in enumerate(shranges):
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                       istep+1, len(shranges), *sh_range)
            bstart, bend, ncol = sh_range
            col0, col1 = col1, col1 + ncol
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1])
                if is_zero(kpt):
                    for i, c in enumerate(vbar):
                        if c != 0:
                            v[i] -= c * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
            v = None

            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ncol),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.T
                        pqkI[:] = aoao.imag.T
                        aoao[:] = 0
                        lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                        if not (is_zero(kpt)
                                and gamma_point(adapted_kptjs[k])):
                            lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:],
                                    1)
                            lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)
                ni = ncol // nao
                for p0, p1 in lib.prange(0, ngs, Gblksize):
                    ft_ao._ft_aopair_kpts(cell,
                                          Gv[p0:p1],
                                          shls_slice,
                                          aosym,
                                          b,
                                          gxyz[p0:p1],
                                          Gvbase,
                                          kpt,
                                          adapted_kptjs,
                                          out=buf)
                    nG = p1 - p0
                    for k, ji in enumerate(adapted_ji_idx):
                        aoao = numpy.ndarray((nG, ni, nao),
                                             dtype=numpy.complex128,
                                             order='F',
                                             buffer=buf[k])
                        pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf)
                        pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf)
                        pqkR[:] = aoao.real.transpose(1, 2, 0)
                        pqkI[:] = aoao.imag.transpose(1, 2, 0)
                        aoao[:] = 0
                        pqkR = pqkR.reshape(-1, nG)
                        pqkI = pqkI.reshape(-1, nG)
                        zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1,
                               j3cR[k][naux:], j3cI[k][naux:], 1)

            naux0 = nauxs[uniq_kptji_id]
            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2c[uniq_kptji_id][0] == 'CD':
                    v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1],
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                else:
                    v = lib.dot(j2c[uniq_kptji_id][1], v)
                feri['j3c/%d' % ji][:naux0, col0:col1] = v

        naux0 = nauxs[uniq_kptji_id]
        for k, ji in enumerate(adapted_ji_idx):
            v = feri['j3c/%d' % ji][:naux0]
            del (feri['j3c/%d' % ji])
            feri['j3c/%d' % ji] = v

    for k, kpt in enumerate(uniq_kpts):
        make_kpt(k)

    feri.close()
Beispiel #37
0
def get_eri(mydf, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)):
    cell = mydf.cell
    nao = cell.nao_nr()
    kptijkl = _format_kpts(kpts)
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros((nao,nao,nao,nao))

    kpti, kptj, kptk, kptl = kptijkl
    q = kptj - kpti
    mesh = mydf.mesh
    coulG = mydf.weighted_coulG(q, False, mesh)
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl):
        eriR = numpy.zeros((nao_pair,nao_pair))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory,
                                aosym='s2'):
            lib.ddot(pqkR*coulG[p0:p1], pqkR.T, 1, eriR, 1)
            lib.ddot(pqkI*coulG[p0:p1], pqkI.T, 1, eriR, 1)
            pqkR = pqkI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
# complex integrals, N^4 elements
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        eriR = numpy.zeros((nao**2,nao**2))
        eriI = numpy.zeros((nao**2,nao**2))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory):
# rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR*coulG[p0:p1], pqkI*coulG[p0:p1], pqkR.T, pqkI.T,
                   1, eriR, eriI, 1)
            pqkR = pqkI = None
        pqkR = pqkI = coulG = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
# rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1))
        return eri.reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        eriR = numpy.zeros((nao**2,nao**2))
        eriI = numpy.zeros((nao**2,nao**2))
#
#       (pq|rs) = \sum_G 4\pi rho_pq rho_rs / |G+k_{pq}|^2
#       rho_pq = 1/N \sum_{Tp,Tq} \int exp(-i(G+k_{pq})*r) p(r-Tp) q(r-Tq) dr
#              = \sum_{Tq} exp(i k_q*Tq) \int exp(-i(G+k_{pq})*r) p(r) q(r-Tq) dr
# Note the k-point wrap-around for rho_rs, which leads to G+k_{pq} in FT
#       rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr
#              = \sum_{Ts} exp(i k_s*Ts) \int exp( i(G+k_{pq})*r) r(r) s(r-Ts) dr
# rho_pq can be directly evaluated by AFT (function pw_loop)
#       rho_pq = pw_loop(k_q, G+k_{pq})
# Assuming r(r) and s(r) are real functions, rho_rs is evaluated
#       rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr
#              = conj(\sum_{Ts} exp(-i k_s*Ts) \int exp(-i(G+k_{pq})*r) r(r) s(r-Ts) dr)
#              = conj( pw_loop(-k_s, G+k_{pq}) )
#
# TODO: For complex AO function r(r) and s(r), pw_loop function needs to be
# extended to include Gv vector in the arguments
        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5),
                         mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = rskR = rskI = None
        return (eriR+eriI*1j)
Beispiel #38
0
def update_amps(mycc, t1, t2, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(mycc.stdout, mycc.verbose)
    nocc, nvir = t1.shape
    nov = nocc*nvir
    fock = eris.fock

    t1new = numpy.zeros_like(t1)
    t2new = numpy.zeros_like(t2)
    t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir))
    mycc.add_wvvVV_(t1, t2, eris, t2new_tril)
    for i in range(nocc):
        for j in range(i+1):
            t2new[i,j] = t2new_tril[i*(i+1)//2+j]
        t2new[i,i] *= .5
    t2new_tril = None
    time1 = log.timer_debug1('vvvv', *time0)

#** make_inter_F
    fov = fock[:nocc,nocc:].copy()
    t1new += fov

    foo = fock[:nocc,:nocc].copy()
    foo[range(nocc),range(nocc)] = 0
    foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1)

    fvv = fock[nocc:,nocc:].copy()
    fvv[range(nvir),range(nvir)] = 0
    fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:])

    #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov)
    eris_ooov = _cp(eris.ooov)
    foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov)
    foo += numpy.einsum('kc,jkic->ij',  -t1, eris_ooov)
    woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4)
    woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True)
    woooo += _cp(eris.oooo).reshape(nocc**2,-1)
    woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3))
    eris_ooov = None
    time1 = log.timer_debug1('woooo', *time1)

    unit = _memory_usage_inloop(nocc, nvir)
    max_memory = max(2000, mycc.max_memory - lib.current_memory()[0])
    blksize = min(nocc, max(BLKMIN, int(max_memory/unit)))
    blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2))
    blknvir = min(nvir, max(BLKMIN, blknvir))
    log.debug1('max_memory %d MB,  nocc,nvir = %d,%d  blksize = %d,%d',
               max_memory, nocc, nvir, blksize, blknvir)
    nvir_pair = nvir * (nvir+1) // 2
    def prefect_ovvv(p0, p1, q0, q1, prefetch):
        if q1 != nvir:
            q0, q1 = q1, min(nvir, q1+blknvir)
            readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch)
            readbuf[:] = eris.ovvv[p0:p1,q0:q1]
    def prefect_ovov(p0, p1, buf):
        buf[:] = eris.ovov[p0:p1]
    def prefect_oovv(p0, p1, buf):
        buf[:] = eris.oovv[p0:p1]

    buflen = max(nocc*nvir**2, nocc**3)
    bufs = numpy.empty((5,blksize*buflen))
    buf1, buf2, buf3, buf4, buf5 = bufs
    for p0, p1 in prange(0, nocc, blksize):
    #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1)
    #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1)
        wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3)
        wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4)
        handler = None
        readbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair))
        ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir))
        for q0, q1 in lib.prange(0, nvir, blknvir):
            if q0 == 0:
                readbuf[:] = eris.ovvv[p0:p1,q0:q1]
            else:
                readbuf, prefetchbuf = prefetchbuf, readbuf
            handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf)
            eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf)
            #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1])
            eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf)
            eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir)

            #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
            #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv)
            #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp)
            if not mycc.direct:
                eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir))
                eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1)
                tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1)
                for j0, j1 in prange(0, nocc, blksize):
                    tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2)
                    tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau)
                    tau += t2[j0:j1,:,q0:q1]
                    lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1,
                             tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0)
                tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2)
                tmp1[:] = tmp.transpose(1,0,2,3)
                lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1)
                eris_vovv = tau = tmp1 = tmp = None

            fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv)
            fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv)

            #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv)
            tmp = t1[:,q0:q1].copy()
            for i in range(eris_ovvv.shape[0]):
                lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1,
                         wooVV[i].reshape(nocc,-1))

            #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1)
            tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1)
            lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1))
            wOoVv[:,:,q0:q1] = tmp

            #: theta = t2.transpose(1,0,2,3) * 2 - t2
            #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv)
            theta = tmp
            theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3)
            theta *= 2
            theta -= t2[:,p0:p1,q0:q1,:]
            lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1)
            theta = tmp = None
        handler.join()
        readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None
        time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1)

        tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1)
        tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3)
        lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1)

        eris_ooov = _cp(eris.ooov[p0:p1])
        eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir))
        handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv)
        tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1)
        tmp[:] = eris_ooov.transpose(0,1,3,2)
        #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1])
        lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1)
        t2new[p0:p1] += wOoVv.transpose(1,0,2,3)

        #:eris_oovv = _cp(eris.oovv[p0:p1])
        handler.join()
        eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir))
        handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov)
    #: g2 = 2 * eris.oOVv - eris.oovv
    #: t1new += numpy.einsum('jb,ijba->ia', t1, g2)
        t1new[p0:p1] += numpy.einsum('jb,ijba->ia',  -t1, eris_oovv)
        wooVV -= eris_oovv

        #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv)
        #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp)
        tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1)
        tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2)
        for j in range(p1-p0):
            tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1)
            lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2)
            t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir)
        eris_oovv = None

        #:eris_ovov = _cp(eris.ovov[p0:p1])
        handler.join()
        for i in range(p1-p0):
            t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5
        t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov)
        #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov)
        #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp)
        for j in range(p1-p0):
            lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1)
            lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1)
        tmp1 = tmp2 = tmp = None

        fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2
        fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov)

    #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov)
    #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta)
        tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1)
        theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2)
        for i in range(p1-p0):
            tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau)
            tau += t2[p0+i]
            theta = lib.transpose(tau, axes=(0,2,1), out=theta)
            theta *= 2
            theta -= tau
            vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau)
            lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1)
            lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1)
        tau = theta = vov = None

    #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1)
    #: t1new += numpy.einsum('jb,ijba->ia', fov, theta)
    #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta)
        theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        for i in range(p1-p0):
            tmp = t2[p0+i].transpose(0,2,1) * 2
            tmp-= t2[p0+i]
            lib.ddot(eris_ooov[i].reshape(nocc,-1),
                     tmp.reshape(-1,nvir), -1, t1new, 1)
            lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i])  # theta[i] = tmp.transpose(2,0,1)
        t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta)
        eris_ooov = None

    #: wOVov += eris.ovov
    #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2)
    #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau)
    #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2)
    #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov)
        for i in range(p1-p0):
            wOoVv[:,i] += wooVV[i]*.5  #: jiba + ijba*.5
        wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5)
        wOVov = wOVov.reshape(nocc,nvir,-1,nvir)
        eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3)
        eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir)
        wOVov += eris_OVov
        theta = theta.reshape(-1,nov)
        for i in range(nocc):  # OVov-OVov.transpose(0,3,2,1)*.5
            eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5
        for j0, j1 in prange(0, nocc, blksize):
            tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2)
            for i in range(j1-j0):
                tau[i]  = t2[j0+i].transpose(1,0,2) * 2
                tau[i] -= t2[j0+i].transpose(2,0,1)
                tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1)
            #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau)
            lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1),
                     .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1)

            #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2)
            #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1])
            tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1,
                           tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir)
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,0,2)
        theta = wOoVv = wOVov = eris_OVov = tmp = tau = None
        time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2)

    #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau)
    #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1)
    #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv)
        tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1)
        tmp[:] = wooVV.transpose(0,2,1,3)
        woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir)
        eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3)
        eris_oOvV[:] = eris_ovov.transpose(0,2,1,3)
        eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5)
        eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir)

        for j0, j1 in prange(0, nocc, blksize):
            tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2)
            #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau)
            _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir,
                   eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir),
                   woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc)
            for i in range(j1-j0):
                tau[i] -= t2[j0+i] * .5
            #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov)
            lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov),
                     eris_oVOv.reshape(-1,nov).T,
                    1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1)
        time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2)

        tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2)
        #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau)
        lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir),
                 .5, t2new.reshape(nocc*nocc,-1), 1)
        eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None

        t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1),
                               out=buf1).reshape(-1,nvir,nocc,nvir)
        tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2)
        for j0, j1 in prange(0, nocc, blksize):
            #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja)
            lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1),
                     t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov))
            for i in range(j1-j0):
                t2new[j0+i] += tmp[i].transpose(1,2,0)
                t2new[j0+i] += tmp[i].transpose(1,0,2) * .5
        woVoV = t2ibja = tmp = None
        time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1)
    buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None
    time1 = log.timer_debug1('contract loop', *time0)

    woooo = None
    ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov)
    ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov)
    #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab)
    #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2)
    lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1)
    lib.ddot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1)

    mo_e = fock.diagonal()
    eia = mo_e[:nocc,None] - mo_e[None,nocc:]
    t1new += numpy.einsum('ib,ab->ia', t1, fvv)
    t1new -= numpy.einsum('ja,ji->ia', t1, foo)
    t1new /= eia

    #: t2new = t2new + t2new.transpose(1,0,3,2)
    ij = 0
    for i in range(nocc):
        for j in range(i+1):
            t2new[i,j] += t2new[j,i].T
            t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j])
            t2new[j,i]  = t2new[i,j].T
            ij += 1

    time0 = log.timer_debug1('update t1 t2', *time0)
    return t1new, t2new
Beispiel #39
0
def get_eri(mydf, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    nao = cell.nao_nr()
    nao_pair = nao * (nao + 1) // 2
    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0] - nao ** 4 * 8 / 1e6)

    ####################
    # gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL:
        eriR = numpy.zeros((nao_pair, nao_pair))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(LpqR.T, LpqR, 1, eriR, 1)
            LpqR = LpqI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao ** 2, -1)
        return eriR

    elif (abs(kpti - kptk).sum() < KPT_DIFF_TOL) and (abs(kptj - kptl).sum() < KPT_DIFF_TOL):
        eriR = numpy.zeros((nao * nao, nao * nao))
        eriI = numpy.zeros((nao * nao, nao * nao))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            LpqR = LpqI = None
        return eriR + eriI * 1j

    ####################
    # (kpt) i == j == k == l != 0
    #
    # (kpt) i == l && j == k && i != j && j != k  =>
    # both vbar and ovlp are zero. It corresponds to the exchange integral.
    #
    # complex integrals, N^4 elements
    elif (abs(kpti - kptl).sum() < KPT_DIFF_TOL) and (abs(kptj - kptk).sum() < KPT_DIFF_TOL):
        eriR = numpy.zeros((nao * nao, nao * nao))
        eriI = numpy.zeros((nao * nao, nao * nao))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            LpqR = LpqI = None
        # transpose(0,1,3,2) because
        # j == k && i == l  =>
        # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
        eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1))
        return eri.reshape(nao ** 2, -1)

    ####################
    # aosym = s1, complex integrals
    #
    # kpti == kptj  =>  kptl == kptk
    # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
    # vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
    # So  kptl/b - kptk/b  must be -1 < k/b < 1.
    #
    else:
        eriR = numpy.zeros((nao * nao, nao * nao))
        eriI = numpy.zeros((nao * nao, nao * nao))
        for (LpqR, LpqI), (LrsR, LrsI) in lib.izip(
            mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)
        ):
            zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eriR + eriI * 1j
Beispiel #40
0
def get_eri(mydf, kpts=None, compact=True):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    nao = cell.nao_nr()
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*8/1e6)

####################
# gamma point, the integral is real and with s4 symmetry
    if abs(kptijkl).sum() < KPT_DIFF_TOL:
        eriR = numpy.zeros((nao_pair,nao_pair))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(LpqR.T, LpqR, 1, eriR, 1)
            LpqR = LpqI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

    elif (abs(kpti-kptk).sum() < KPT_DIFF_TOL) and (abs(kptj-kptl).sum() < KPT_DIFF_TOL):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            LpqR = LpqI = None
        return eriR + eriI*1j

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1)
            LpqR = LpqI = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
        eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1))
        return eri.reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for (LpqR, LpqI), (LrsR, LrsI) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False),
                         mydf.sr_loop(kptijkl[2:], max_memory, False)):
            zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, 1, eriR, eriI, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eriR + eriI*1j
Beispiel #41
0
    def __init__(self, cc, mo_coeff=None, method='incore'):
        cput0 = (time.clock(), time.time())
        moidx = numpy.ones(cc.mo_occ.size, dtype=numpy.bool)
        if isinstance(cc.frozen, (int, numpy.integer)):
            moidx[:cc.frozen] = False
        elif len(cc.frozen) > 0:
            moidx[numpy.asarray(cc.frozen)] = False
        if mo_coeff is None:
            self.mo_coeff = mo_coeff = cc.mo_coeff[:,moidx]
        else:
            self.mo_coeff = mo_coeff = mo_coeff[:,moidx]
        dm = cc._scf.make_rdm1(cc.mo_coeff, cc.mo_occ)
        fockao = cc._scf.get_hcore() + cc._scf.get_veff(cc.mol, dm)
        self.fock = reduce(numpy.dot, (mo_coeff.T, fockao, mo_coeff))

        nocc = cc.nocc
        nmo = cc.nmo
        nvir = nmo - nocc
        mem_incore, mem_outcore, mem_basic = _mem_usage(nocc, nvir)
        mem_now = lib.current_memory()[0]

        log = logger.Logger(cc.stdout, cc.verbose)
        if hasattr(cc._scf, 'with_df') and cc._scf.with_df:
            #log.warn('CCSD detected DF being bound to the HF object. '
            #         'MO integrals are computed based on the DF 3-tensor integrals.\n'
            #         'You can switch to dfccsd.CCSD for the DF-CCSD implementation')
            nvir_pair = nvir * (nvir+1) // 2
            oooo = numpy.zeros((nocc*nocc,nocc*nocc))
            ooov = numpy.zeros((nocc*nocc,nocc*nvir))
            ovoo = numpy.zeros((nocc*nvir,nocc*nocc))
            oovv = numpy.zeros((nocc*nocc,nvir*nvir))
            ovov = numpy.zeros((nocc*nvir,nocc*nvir))
            ovvv = numpy.zeros((nocc*nvir,nvir_pair))
            vvvv = numpy.zeros((nvir_pair,nvir_pair))

            mo = numpy.asarray(mo_coeff, order='F')
            nmo = mo.shape[1]
            ijslice = (0, nmo, 0, nmo)
            Lpq = None
            for eri1 in cc._scf.with_df.loop():
                Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', out=Lpq).reshape(-1,nmo,nmo)
                Loo = Lpq[:,:nocc,:nocc].reshape(-1,nocc**2)
                Lov = Lpq[:,:nocc,nocc:].reshape(-1,nocc*nvir)
                Lvv = Lpq[:,nocc:,nocc:].reshape(-1,nvir**2)
                lib.ddot(Loo.T, Loo, 1, oooo, 1)
                lib.ddot(Loo.T, Lov, 1, ooov, 1)
                lib.ddot(Lov.T, Loo, 1, ovoo, 1)
                lib.ddot(Loo.T, Lvv, 1, oovv, 1)
                lib.ddot(Lov.T, Lov, 1, ovov, 1)
                Lvv = lib.pack_tril(Lvv.reshape(-1,nvir,nvir))
                lib.ddot(Lov.T, Lvv, 1, ovvv, 1)
                lib.ddot(Lvv.T, Lvv, 1, vvvv, 1)

            _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
            self.feri1 = feri1 = h5py.File(_tmpfile1.name)
            def __del__feri1(self):
                feri1.close()
            self.feri1.__del__ = __del__feri1
            self.feri1['oooo'] = oooo.reshape(nocc,nocc,nocc,nocc)
            self.feri1['ooov'] = ooov.reshape(nocc,nocc,nocc,nvir)
            self.feri1['ovoo'] = ovoo.reshape(nocc,nvir,nocc,nocc)
            self.feri1['oovv'] = oovv.reshape(nocc,nocc,nvir,nvir)
            self.feri1['ovov'] = ovov.reshape(nocc,nvir,nocc,nvir)
            self.feri1['ovvv'] = ovvv.reshape(nocc,nvir,nvir_pair)
            self.feri1['vvvv'] = vvvv.reshape(nvir_pair,nvir_pair)
            self.oooo = self.feri1['oooo']
            self.ooov = self.feri1['ooov']
            self.ovoo = self.feri1['ovoo']
            self.oovv = self.feri1['oovv']
            self.ovov = self.feri1['ovov']
            self.ovvv = self.feri1['ovvv']
            self.vvvv = self.feri1['vvvv']

        elif (method == 'incore' and cc._scf._eri is not None and
            (mem_incore+mem_now < cc.max_memory) or cc.mol.incore_anyway):
            eri1 = ao2mo.incore.full(cc._scf._eri, mo_coeff)
            #:eri1 = ao2mo.restore(1, eri1, nmo)
            #:self.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy()
            #:self.ooov = eri1[:nocc,:nocc,:nocc,nocc:].copy()
            #:self.ovoo = eri1[:nocc,nocc:,:nocc,:nocc].copy()
            #:self.oovv = eri1[:nocc,:nocc,nocc:,nocc:].copy()
            #:self.ovov = eri1[:nocc,nocc:,:nocc,nocc:].copy()
            #:ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy()
            #:self.ovvv = numpy.empty((nocc,nvir,nvir*(nvir+1)//2))
            #:for i in range(nocc):
            #:    for j in range(nvir):
            #:        self.ovvv[i,j] = lib.pack_tril(ovvv[i,j])
            #:self.vvvv = ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir)
            nvir_pair = nvir * (nvir+1) // 2
            self.oooo = numpy.empty((nocc,nocc,nocc,nocc))
            self.ooov = numpy.empty((nocc,nocc,nocc,nvir))
            self.ovoo = numpy.empty((nocc,nvir,nocc,nocc))
            self.oovv = numpy.empty((nocc,nocc,nvir,nvir))
            self.ovov = numpy.empty((nocc,nvir,nocc,nvir))
            self.ovvv = numpy.empty((nocc,nvir,nvir_pair))
            self.vvvv = numpy.empty((nvir_pair,nvir_pair))
            ij = 0
            outbuf = numpy.empty((nmo,nmo,nmo))
            for i in range(nocc):
                buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1])
                for j in range(i+1):
                    self.oooo[i,j] = self.oooo[j,i] = buf[j,:nocc,:nocc]
                    self.ooov[i,j] = self.ooov[j,i] = buf[j,:nocc,nocc:]
                    self.oovv[i,j] = self.oovv[j,i] = buf[j,nocc:,nocc:]
                ij += i + 1
            ij1 = 0
            for i in range(nocc,nmo):
                buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1])
                self.ovoo[:,i-nocc] = buf[:nocc,:nocc,:nocc]
                self.ovov[:,i-nocc] = buf[:nocc,:nocc,nocc:]
                for j in range(nocc):
                    self.ovvv[j,i-nocc] = lib.pack_tril(_cp(buf[j,nocc:,nocc:]))
                for j in range(nocc, i+1):
                    self.vvvv[ij1] = lib.pack_tril(_cp(buf[j,nocc:,nocc:]))
                    ij1 += 1
                ij += i + 1
        else:
            cput1 = time.clock(), time.time()
            _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
            _tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
            self.feri1 = feri1 = h5py.File(_tmpfile1.name)
            def __del__feri1(self):
                feri1.close()
            self.feri1.__del__ = __del__feri1
            orbo = mo_coeff[:,:nocc]
            orbv = mo_coeff[:,nocc:]
            nvpair = nvir * (nvir+1) // 2
            self.oooo = self.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8')
            self.ooov = self.feri1.create_dataset('ooov', (nocc,nocc,nocc,nvir), 'f8')
            self.ovoo = self.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8')
            self.oovv = self.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8')
            self.ovov = self.feri1.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8')
            self.ovvv = self.feri1.create_dataset('ovvv', (nocc,nvir,nvpair), 'f8')
            fsort = _ccsd.libcc.CCsd_sort_inplace
            nocc_pair = nocc*(nocc+1)//2
            nvir_pair = nvir*(nvir+1)//2
            def sort_inplace(eri):
                fsort(eri.ctypes.data_as(ctypes.c_void_p),
                      ctypes.c_int(nocc), ctypes.c_int(nvir),
                      ctypes.c_int(eri.shape[0]))
                vv = eri[:,:nvir_pair]
                oo = eri[:,nvir_pair:nvir_pair+nocc_pair]
                ov = eri[:,nvir_pair+nocc_pair:].reshape(-1,nocc,nvir)
                return oo, ov, vv
            buf = numpy.empty((nmo,nmo,nmo))
            def save_occ_frac(i, p0, p1, eri):
                oo, ov, vv = sort_inplace(eri)
                self.oooo[i,p0:p1] = lib.unpack_tril(oo, out=buf)
                self.ooov[i,p0:p1] = ov
                self.oovv[i,p0:p1] = lib.unpack_tril(vv, out=buf)
            def save_vir_frac(i, p0, p1, eri):
                oo, ov, vv = sort_inplace(eri)
                self.ovoo[i,p0:p1] = lib.unpack_tril(oo, out=buf)
                self.ovov[i,p0:p1] = ov
                self.ovvv[i,p0:p1] = vv

            if not cc.direct:
                max_memory = max(2000,cc.max_memory-lib.current_memory()[0])
                self.feri2 = feri2 = h5py.File(_tmpfile2.name)
                def __del__feri2(self):
                    feri2.close()
                self.feri2.__del__ = __del__feri2
                ao2mo.full(cc.mol, orbv, self.feri2, max_memory=max_memory, verbose=log)
                self.vvvv = self.feri2['eri_mo']
                cput1 = log.timer_debug1('transforming vvvv', *cput1)

            tmpfile3 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
            with h5py.File(tmpfile3.name, 'w') as feri:
                max_memory = max(2000, cc.max_memory-lib.current_memory()[0])
                mo = numpy.hstack((orbv, orbo))
                ao2mo.general(cc.mol, (orbo,mo,mo,mo),
                              feri, max_memory=max_memory, verbose=log)
                cput1 = log.timer_debug1('transforming oppp', *cput1)
                blksize = max(1, int(min(8e9,max_memory*.5e6)/8/nmo**2))
                handler = None
                for i in range(nocc):
                    for p0, p1 in lib.prange(0, nvir, blksize):
                        eri = _cp(feri['eri_mo'][i*nmo+p0:i*nmo+p1])
                        handler = async_do(handler, save_vir_frac, i, p0, p1, eri)
                    for p0, p1 in lib.prange(0, nocc, blksize):
                        eri = _cp(feri['eri_mo'][i*nmo+nvir+p0:i*nmo+nvir+p1])
                        handler = async_do(handler, save_occ_frac, i, p0, p1, eri)
                    cput1 = log.timer_debug1('sorting %d'%i, *cput1)
                if handler is not None:
                    handler.join()
                for key in feri.keys():
                    del(feri[key])
        log.timer('CCSD integral transformation', *cput0)
Beispiel #42
0
def contract(myci, civec, eris):
    time0 = time.clock(), time.time()
    log = logger.Logger(myci.stdout, myci.verbose)
    nocc = myci.nocc
    nmo = myci.nmo
    nvir = nmo - nocc
    nov = nocc * nvir
    noo = nocc**2
    c0 = civec[0]
    c1 = civec[1:nov + 1].reshape(nocc, nvir)
    c2 = civec[nov + 1:].reshape(nocc, nocc, nvir, nvir)

    cinew = numpy.zeros_like(civec)
    t1 = cinew[1:nov + 1].reshape(nocc, nvir)
    t2 = cinew[nov + 1:].reshape(nocc, nocc, nvir, nvir)
    t2new_tril = numpy.zeros((nocc * (nocc + 1) // 2, nvir, nvir))
    myci.add_wvvVV_(c2, eris, t2new_tril)
    for i in range(nocc):
        for j in range(i + 1):
            t2[i, j] = t2new_tril[i * (i + 1) // 2 + j]
        t2[i, i] *= .5
    t2new_tril = None
    time1 = log.timer_debug1('vvvv', *time0)
    #:t2 += numpy.einsum('iklj,klab->ijab', _cp(eris.oooo)*.5, c2)
    oooo = lib.transpose(_cp(eris.oooo).reshape(nocc, noo, nocc),
                         axes=(0, 2, 1))
    lib.ddot(oooo.reshape(noo, noo), c2.reshape(noo, -1), .5,
             t2.reshape(noo, -1), 1)

    foo = eris.fock[:nocc, :nocc].copy()
    fov = eris.fock[:nocc, nocc:].copy()
    fvv = eris.fock[nocc:, nocc:].copy()

    t1 += fov * c0
    t1 += numpy.einsum('ib,ab->ia', c1, fvv)
    t1 -= numpy.einsum('ja,ji->ia', c1, foo)

    #:t2 += numpy.einsum('bc,ijac->ijab', fvv, c2)
    #:t2 -= numpy.einsum('kj,kiba->ijab', foo, c2)
    #:t2 += numpy.einsum('ia,jb->ijab', c1, fov)
    lib.ddot(c2.reshape(-1, nvir), fvv, 1, t2.reshape(-1, nvir), 1)
    lib.ddot(foo, c2.reshape(nocc, -1), -1, t2.reshape(nocc, -1), 1)
    for j in range(nocc):
        t2[:, j] += numpy.einsum('ia,b->iab', c1, fov[j])

    eris_vovv = lib.unpack_tril(eris.vovv).reshape(nvir, nocc, nvir, -1)
    unit = _memory_usage_inloop(nocc, nvir)
    max_memory = max(2000, myci.max_memory - lib.current_memory()[0])
    blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory / unit)))
    log.debug1('max_memory %d MB,  nocc,nvir = %d,%d  blksize = %d',
               max_memory, nocc, nvir, blksize)
    nvir_pair = nvir * (nvir + 1) // 2
    for p0, p1 in lib.prange(0, nvir, blksize):
        eris_vvoo = _cp(eris.vvoo[p0:p1])
        oovv = lib.transpose(eris_vvoo.reshape(-1, nocc**2))
        #:eris_oVoV = eris_vvoo.transpose(2,0,3,1)
        eris_oVoV = numpy.ndarray((nocc, p1 - p0, nocc, nvir))
        eris_oVoV[:] = oovv.reshape(nocc, nocc, p1 - p0,
                                    nvir).transpose(0, 2, 1, 3)
        eris_vvoo = oovv = None
        #:tmp = numpy.einsum('ikca,jbkc->jiba', c2, eris_oVoV)
        #:t2[:,:,p0:p1] -= tmp*.5
        #:t2[:,:,p0:p1] -= tmp.transpose(1,0,2,3)
        for i in range(nocc):
            tmp = lib.ddot(eris_oVoV.reshape(-1, nov),
                           c2[i].reshape(nov, nvir))
            tmp = tmp.reshape(nocc, p1 - p0, nvir)
            t2[:, i, p0:p1] -= tmp * .5
            t2[i, :, p0:p1] -= tmp

        eris_voov = _cp(eris.voov[p0:p1])
        for i in range(p0, p1):
            t2[:, :, i] += eris_voov[i - p0] * (c0 * .5)
        t1[:, p0:p1] += numpy.einsum('jb,aijb->ia', c1, eris_voov) * 2
        t1[:, p0:p1] -= numpy.einsum('jb,iajb->ia', c1, eris_oVoV)

        #:ovov = eris_voov.transpose(2,0,1,3) - eris_vvoo.transpose(2,0,3,1)
        ovov = eris_oVoV
        ovov *= -.5
        for i in range(nocc):
            ovov[i] += eris_voov[:, :, i]
        eris_oVoV = eris_vvoo = None
        #:theta = c2[:,:,p0:p1]
        #:theta = theta * 2 - theta.transpose(1,0,2,3)
        #:theta = theta.transpose(2,0,1,3)
        theta = numpy.ndarray((p1 - p0, nocc, nocc, nvir))
        for i in range(p0, p1):
            theta[i - p0] = c2[:, :, i] * 2
            theta[i - p0] -= c2[:, :, i].transpose(1, 0, 2)
        #:t2 += numpy.einsum('ckia,jckb->ijab', theta, ovov)
        for j in range(nocc):
            tmp = lib.ddot(theta.reshape(-1, nov).T, ovov[j].reshape(-1, nvir))
            t2[:, j] += tmp.reshape(nocc, nvir, nvir)
        tmp = ovov = None

        t1[:, p0:p1] += numpy.einsum('aijb,jb->ia', theta, fov)

        eris_vooo = _cp(eris.vooo[p0:p1])
        #:t1 -= numpy.einsum('bjka,bjki->ia', theta, eris_vooo)
        #:t2[:,:,p0:p1] -= numpy.einsum('ka,bjik->jiba', c1, eris_vooo)
        lib.ddot(
            eris_vooo.reshape(-1, nocc).T, theta.reshape(-1, nvir), -1, t1, 1)
        for i in range(p0, p1):
            t2[:, :, i] -= lib.ddot(eris_vooo[i - p0].reshape(noo, -1),
                                    c1).reshape(nocc, nocc, -1)
        eris_vooo = None

        eris_vovv = _cp(eris.vovv[p0:p1]).reshape(-1, nvir_pair)
        eris_vovv = lib.unpack_tril(eris_vovv).reshape(p1 - p0, nocc, nvir,
                                                       nvir)
        #:t1 += numpy.einsum('cjib,cjba->ia', theta, eris_vovv)
        #:t2[:,:,p0:p1] += numpy.einsum('jc,aibc->ijab', c1, eris_vovv)
        theta = lib.transpose(theta.reshape(-1, nocc, nvir), axes=(0, 2, 1))
        lib.ddot(
            theta.reshape(-1, nocc).T, eris_vovv.reshape(-1, nvir), 1, t1, 1)
        for i in range(p0, p1):
            tmp = lib.ddot(c1, eris_vovv[i - p0].reshape(-1, nvir).T)
            t2[:, :, i] += tmp.reshape(nocc, nocc, nvir).transpose(1, 0, 2)
        tmp = eris_vovv = None

    for i in range(nocc):
        for j in range(i + 1):
            t2[i, j] += t2[j, i].T
            t2[j, i] = t2[i, j].T

    cinew[0] += numpy.einsum('ia,ia->', fov, c1) * 2
    cinew[0] += numpy.einsum('aijb,ijab->', eris.voov, c2) * 2
    cinew[0] -= numpy.einsum('aijb,jiab->', eris.voov, c2)
    return cinew
Beispiel #43
0
def make_rdm2(ci, nmo, nocc):
    '''spin-traced 2pdm in chemist's notation
    '''
    nvir = nmo - nocc
    noo = nocc**2
    nov = nocc * nvir
    c0, c1, c2 = cisdvec_to_amplitudes(ci, nmo, nocc)
    doovv = c0 * c2
    dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2)
    dovoo = -numpy.einsum('ia,klac->ickl', c1, c2)
    doooo = lib.ddot(c2.reshape(noo, -1),
                     c2.reshape(noo, -1).T).reshape((nocc, ) * 4)
    dvvvv = lib.ddot(c2.reshape(noo, -1).T, c2.reshape(noo, -1)).reshape(
        (nvir, ) * 4)

    rdm2 = numpy.zeros((nmo, nmo, nmo, nmo))
    rdm2[:nocc, :nocc, :nocc, :nocc] = doooo * 4 - doooo.transpose(1, 0, 2,
                                                                   3) * 2
    rdm2[:nocc,
         nocc:, :nocc, :nocc] = dovoo * 4 - dovoo.transpose(0, 1, 3, 2) * 2
    rdm2[nocc:, :nocc, :nocc, :nocc] = rdm2[:nocc,
                                            nocc:, :nocc, :nocc].transpose(
                                                1, 0, 3, 2)
    rdm2[:nocc, :nocc, :nocc,
         nocc:] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(2, 3, 0, 1)
    rdm2[:nocc, :nocc,
         nocc:, :nocc] = rdm2[:nocc,
                              nocc:, :nocc, :nocc].transpose(3, 2, 1, 0)

    rdm2[:nocc, :nocc, nocc:,
         nocc:] = doovv * 4 - doovv.transpose(1, 0, 2, 3) * 2
    rdm2[nocc:, nocc:, :nocc, :nocc] = rdm2[:nocc, :nocc, nocc:,
                                            nocc:].transpose(2, 3, 0, 1)
    rdm2[nocc:, nocc:,
         nocc:, :nocc] = dvvvo * 4 - dvvvo.transpose(1, 0, 2, 3) * 2
    rdm2[nocc:, nocc:, :nocc,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(1, 0, 3, 2)
    rdm2[nocc:, :nocc, nocc:,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(2, 3, 0, 1)
    rdm2[:nocc, nocc:, nocc:,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(3, 2, 1, 0)
    rdm2[nocc:, nocc:, nocc:,
         nocc:] = dvvvv * 4 - dvvvv.transpose(1, 0, 2, 3) * 2

    theta = c2 * 2 - c2.transpose(1, 0, 2, 3)
    dovov = numpy.einsum('ia,kc->icka', c1, c1) * -2
    #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2
    #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2
    dovov -= lib.ddot(
        c2.transpose(0, 2, 1, 3).reshape(nov, -1).T,
        theta.transpose(0, 2, 1, 3).reshape(nov, -1),
        2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1)
    dovov -= lib.ddot(
        c2.transpose(0, 3, 1, 2).reshape(nov, -1),
        theta.transpose(0, 3, 1, 2).reshape(nov, -1).T,
        2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1)
    dvoov = numpy.einsum('ia,kc->cika', c1, c1) * 4
    #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2
    dvoov += lib.ddot(
        theta.transpose(0, 2, 1, 3).reshape(nov, -1).T,
        theta.transpose(0, 2, 1, 3).reshape(nov, -1),
        2).reshape(nocc, nvir, nocc, nvir).transpose(1, 2, 0, 3)

    rdm2[:nocc, nocc:, :nocc, nocc:] = dovov
    rdm2[nocc:, :nocc, nocc:, :nocc] = dovov.transpose(1, 0, 3, 2)
    rdm2[nocc:, :nocc, :nocc, nocc:] = dvoov
    rdm2[:nocc, nocc:, nocc:, :nocc] = dvoov.transpose(1, 0, 3, 2)

    rdm1 = make_rdm1(ci, nmo, nocc)
    for i in range(nocc):
        rdm1[i, i] -= 2
    for i in range(nocc):
        for j in range(nocc):
            rdm2[i, j, i, j] += 4
            rdm2[i, j, j, i] -= 2
        rdm2[i, :, i, :] += rdm1 * 2
        rdm2[:, i, :, i] += rdm1 * 2
        rdm2[:, i, i, :] -= rdm1
        rdm2[i, :, :, i] -= rdm1

    return rdm2.transpose(0, 2, 1, 3)  # to chemist's notation
Beispiel #44
0
def make_rdm2(ci, nmo, nocc):
    nvir = nmo - nocc
    noo = nocc**2
    nov = nocc * nvir
    c0 = ci[0]
    c1 = ci[1:nocc * nvir + 1].reshape(nocc, nvir)
    c2 = ci[nocc * nvir + 1:].reshape(nocc, nocc, nvir, nvir)
    doovv = c0 * c2
    dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2)
    dovoo = -numpy.einsum('ia,klac->ickl', c1, c2)
    doooo = lib.ddot(c2.reshape(noo, -1),
                     c2.reshape(noo, -1).T).reshape((nocc, ) * 4)
    dvvvv = lib.ddot(c2.reshape(noo, -1).T, c2.reshape(noo, -1)).reshape(
        (nvir, ) * 4)

    rdm2 = numpy.zeros((nmo, nmo, nmo, nmo))
    rdm2[:nocc, :nocc, :nocc, :nocc] = doooo * 4 - doooo.transpose(1, 0, 2,
                                                                   3) * 2
    rdm2[:nocc,
         nocc:, :nocc, :nocc] = dovoo * 4 - dovoo.transpose(0, 1, 3, 2) * 2
    rdm2[nocc:, :nocc, :nocc, :nocc] = rdm2[:nocc,
                                            nocc:, :nocc, :nocc].transpose(
                                                1, 0, 3, 2)
    rdm2[:nocc, :nocc, :nocc,
         nocc:] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(2, 3, 0, 1)
    rdm2[:nocc, :nocc,
         nocc:, :nocc] = rdm2[:nocc,
                              nocc:, :nocc, :nocc].transpose(3, 2, 1, 0)

    rdm2[:nocc, :nocc, nocc:,
         nocc:] = doovv * 4 - doovv.transpose(1, 0, 2, 3) * 2
    rdm2[nocc:, nocc:, :nocc, :nocc] = rdm2[:nocc, :nocc, nocc:,
                                            nocc:].transpose(2, 3, 0, 1)
    rdm2[nocc:, nocc:,
         nocc:, :nocc] = dvvvo * 4 - dvvvo.transpose(1, 0, 2, 3) * 2
    rdm2[nocc:, nocc:, :nocc,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(1, 0, 3, 2)
    rdm2[nocc:, :nocc, nocc:,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(2, 3, 0, 1)
    rdm2[:nocc, nocc:, nocc:,
         nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(3, 2, 1, 0)
    rdm2[nocc:, nocc:, nocc:,
         nocc:] = dvvvv * 4 - dvvvv.transpose(1, 0, 2, 3) * 2

    # Fixme: This seems giving right answer, but not based on solid formula
    theta = c2 * 2 - c2.transpose(1, 0, 2, 3)
    dovov = numpy.einsum('ia,kc->icka', c1, c1) * -2
    #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2
    #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2
    dovov -= lib.ddot(
        c2.transpose(0, 2, 1, 3).reshape(nov, -1).T,
        theta.transpose(0, 2, 1, 3).reshape(nov, -1),
        2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1)
    dovov -= lib.ddot(
        c2.transpose(0, 3, 1, 2).reshape(nov, -1),
        theta.transpose(0, 3, 1, 2).reshape(nov, -1).T,
        2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1)
    dvoov = numpy.einsum('ia,kc->cika', c1, c1) * 4
    #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2
    dvoov += lib.ddot(
        theta.transpose(0, 2, 1, 3).reshape(nov, -1).T,
        theta.transpose(0, 2, 1, 3).reshape(nov, -1),
        2).reshape(nocc, nvir, nocc, nvir).transpose(1, 2, 0, 3)

    #    rdm2[:nocc,nocc:,:nocc,nocc:] = dovov*4-dvoov.transpose(1,0,2,3)*2
    #    rdm2[nocc:,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,nocc:].transpose(1,0,3,2)
    #    rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov*4-dovov.transpose(1,0,2,3)*2
    #    rdm2[:nocc,nocc:,nocc:,:nocc] = rdm2[nocc:,:nocc,:nocc,nocc:].transpose(1,0,3,2)
    rdm2[:nocc, nocc:, :nocc, nocc:] = dovov
    rdm2[nocc:, :nocc, nocc:, :nocc] = dovov.transpose(1, 0, 3, 2)
    rdm2[nocc:, :nocc, :nocc, nocc:] = dvoov
    rdm2[:nocc, nocc:, nocc:, :nocc] = dvoov.transpose(1, 0, 3, 2)

    rdm1 = make_rdm1(ci, nmo, nocc)
    for i in range(nocc):
        rdm1[i, i] -= 2
    for i in range(nocc):
        for j in range(nocc):
            rdm2[i, j, i, j] += 4
            rdm2[i, j, j, i] -= 2
        rdm2[i, :, i, :] += rdm1 * 2
        rdm2[:, i, :, i] += rdm1 * 2
        rdm2[:, i, i, :] -= rdm1
        rdm2[i, :, :, i] -= rdm1

    return rdm2
Beispiel #45
0
def get_eri(mydf, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)):
    cell = mydf.cell
    nao = cell.nao_nr()
    kptijkl = _format_kpts(kpts)
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros((nao,nao,nao,nao))

    kpti, kptj, kptk, kptl = kptijkl
    q = kptj - kpti
    mesh = mydf.mesh
    coulG = mydf.weighted_coulG(q, False, mesh)
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl):
        eriR = numpy.zeros((nao_pair,nao_pair))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory,
                                aosym='s2'):
            lib.ddot(pqkR*coulG[p0:p1], pqkR.T, 1, eriR, 1)
            lib.ddot(pqkI*coulG[p0:p1], pqkI.T, 1, eriR, 1)
            pqkR = pqkI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
# complex integrals, N^4 elements
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        eriR = numpy.zeros((nao**2,nao**2))
        eriI = numpy.zeros((nao**2,nao**2))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory):
# rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR*coulG[p0:p1], pqkI*coulG[p0:p1], pqkR.T, pqkI.T,
                   1, eriR, eriI, 1)
            pqkR = pqkI = None
        pqkR = pqkI = coulG = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
# rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1))
        return eri.reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        eriR = numpy.zeros((nao**2,nao**2))
        eriI = numpy.zeros((nao**2,nao**2))
#
#       (pq|rs) = \sum_G 4\pi rho_pq rho_rs / |G+k_{pq}|^2
#       rho_pq = 1/N \sum_{Tp,Tq} \int exp(-i(G+k_{pq})*r) p(r-Tp) q(r-Tq) dr
#              = \sum_{Tq} exp(i k_q*Tq) \int exp(-i(G+k_{pq})*r) p(r) q(r-Tq) dr
# Note the k-point wrap-around for rho_rs, which leads to G+k_{pq} in FT
#       rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr
#              = \sum_{Ts} exp(i k_s*Ts) \int exp( i(G+k_{pq})*r) r(r) s(r-Ts) dr
# rho_pq can be directly evaluated by AFT (function pw_loop)
#       rho_pq = pw_loop(k_q, G+k_{pq})
# Assuming r(r) and s(r) are real functions, rho_rs is evaluated
#       rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr
#              = conj(\sum_{Ts} exp(-i k_s*Ts) \int exp(-i(G+k_{pq})*r) r(r) s(r-Ts) dr)
#              = conj( pw_loop(-k_s, G+k_{pq}) )
#
# TODO: For complex AO function r(r) and s(r), pw_loop function needs to be
# extended to include Gv vector in the arguments
        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5),
                         mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = rskR = rskI = None
        return (eriR+eriI*1j)
Beispiel #46
0
def _ao2mo_ovov(mp, orbs, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)
    orboa = numpy.asarray(orbs[0], order='F')
    orbva = numpy.asarray(orbs[1], order='F')
    orbob = numpy.asarray(orbs[2], order='F')
    orbvb = numpy.asarray(orbs[3], order='F')
    nao, nocca = orboa.shape
    noccb = orbob.shape[1]
    nvira = orbva.shape[1]
    nvirb = orbvb.shape[1]

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nbas = mol.nbas
    assert (nvira <= nao)
    assert (nvirb <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(
        4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocca)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao))
    ftmp = lib.H5TmpFile()
    disk = (nocca**2 * (nao * (nao + dmax) / 2 + nvira**2) + noccb**2 *
            (nao * (nao + dmax) / 2 + nvirb**2) + nocca * noccb *
            (nao**2 + nvira * nvirb))
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax, disk * 8 / 1e6)

    fint = gto.moleintor.getints4c
    aa_blk_slices = []
    ab_blk_slices = []
    count_ab = 0
    count_aa = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ish0, ish1, ni in sh_ranges:
            for jsh0, jsh1, nj in sh_ranges:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]

                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0,
                                       nbas),
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = lib.ddot(orboa.T,
                                 eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao))
                tmp_li = lib.ddot(
                    orbob.T,
                    tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T)
                tmp_li = tmp_li.reshape(noccb, nocca, (i1 - i0), (j1 - j0))
                save('ab/%d' % count_ab, tmp_li.transpose(1, 0, 2, 3))
                ab_blk_slices.append((i0, i1, j0, j1))
                count_ab += 1

                if ish0 >= jsh0:
                    tmp_li = lib.ddot(
                        orboa.T,
                        tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T)
                    tmp_li = tmp_li.reshape(nocca, nocca, (i1 - i0), (j1 - j0))
                    save('aa/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3))

                    tmp_i = lib.ddot(
                        orbob.T, eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao))
                    tmp_li = lib.ddot(
                        orbob.T,
                        tmp_i.reshape(noccb * (i1 - i0) * (j1 - j0), nao).T)
                    tmp_li = tmp_li.reshape(noccb, noccb, (i1 - i0), (j1 - j0))
                    save('bb/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3))
                    aa_blk_slices.append((i0, i1, j0, j1))
                    count_aa += 1

                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = None

    fovov = feri.create_dataset('ovov', (nocca * nvira, nocca * nvira),
                                'f8',
                                chunks=(nvira, nvira))
    fovOV = feri.create_dataset('ovOV', (nocca * nvira, noccb * nvirb),
                                'f8',
                                chunks=(nvira, nvirb))
    fOVOV = feri.create_dataset('OVOV', (noccb * nvirb, noccb * nvirb),
                                'f8',
                                chunks=(nvirb, nvirb))
    occblk = int(
        min(max(nocca, noccb),
            max(4, 250 / nocca, max_memory * .9e6 / 8 / (nao**2 * nocca) / 5)))

    def load_aa(h5g, nocc, i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(aa_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(h5g[str(k)][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def load_ab(h5g, nocca, i0, eri):
        if i0 < nocca:
            i1 = min(i0 + occblk, nocca)
            for k, (p0, p1, q0, q1) in enumerate(ab_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1]

    def save(h5dat, nvir, i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(nvir, -1)

    with lib.call_in_background(save) as bsave:
        with lib.call_in_background(load_aa) as prefetch:
            buf_prefecth = numpy.empty((occblk, nocca, nao, nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['aa'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['aa'], nocca, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocca, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbva, (0, nvira, 0, nvira), 's1',
                                   's1')
                bsave(
                    fovov, nvira, i0, i1,
                    dat.reshape(i1 - i0, nocca, nvira,
                                nvira).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for aa [%d:%d]' % (i0, i1), *time1)

            buf_prefecth = numpy.empty((occblk, noccb, nao, nao))
            buf = numpy.empty_like(buf_prefecth)
            load_aa(ftmp['bb'], noccb, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, noccb, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['bb'], noccb, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbvb, (0, nvirb, 0, nvirb), 's1',
                                   's1')
                bsave(
                    fOVOV, nvirb, i0, i1,
                    dat.reshape(i1 - i0, noccb, nvirb,
                                nvirb).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for bb [%d:%d]' % (i0, i1), *time1)

        orbvab = numpy.asarray(numpy.hstack((orbva, orbvb)), order='F')
        with lib.call_in_background(load_ab) as prefetch:
            load_ab(ftmp['ab'], nocca, 0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocca, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(ftmp['ab'], nocca, i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao)
                dat = _ao2mo.nr_e2(eri, orbvab,
                                   (0, nvira, nvira, nvira + nvirb), 's1',
                                   's1')
                bsave(
                    fovOV, nvira, i0, i1,
                    dat.reshape(i1 - i0, noccb, nvira,
                                nvirb).transpose(0, 2, 1, 3))
                time1 = log.timer_debug1(
                    'pass2 ao2mo for ab [%d:%d]' % (i0, i1), *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
Beispiel #47
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    gs = mydf.gs
    Gv, Gvbase, kws = cell.get_Gv_weights(gs)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngs = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    nauxs = []
    t1 = log.timer_debug1('2c2e', *t1)

    if h5py.is_hdf5(cderi_file):
        feri = h5py.File(cderi_file)
    else:
        feri = h5py.File(cderi_file, 'w')
    for k, kpt in enumerate(uniq_kpts):
        aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T
        coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs))
        kLR = (aoaux.real * coulG).T
        kLI = (aoaux.imag * coulG).T
        if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T)
        if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T)
        aoaux = None

        kLR1 = numpy.asarray(kLR[:, naux:], order='C')
        kLI1 = numpy.asarray(kLI[:, naux:], order='C')
        if is_zero(kpt):  # kpti == kptj
            for p0, p1 in mydf.mpi_prange(0, ngs):
                j2cR = lib.ddot(kLR1[p0:p1].T, kLR[p0:p1])
                j2cR = lib.ddot(kLI1[p0:p1].T, kLI[p0:p1], 1, j2cR, 1)
                j2c[k][naux:] -= mpi.allreduce(j2cR)
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T
        else:
            for p0, p1 in mydf.mpi_prange(0, ngs):
                j2cR, j2cI = zdotCN(kLR1[p0:p1].T, kLI1[p0:p1].T, kLR[p0:p1],
                                    kLI[p0:p1])
                j2cR = mpi.allreduce(j2cR)
                j2cI = mpi.allreduce(j2cI)
                j2c[k][naux:] -= j2cR + j2cI * 1j
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj()
        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            feri['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
            nauxs.append(naux)
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that gs is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug2('metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < LINEAR_DEP_THR))
            v = v[:, w > LINEAR_DEP_THR].T.conj()
            v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1)
            feri['j2c/%d' % k] = v
            j2ctags.append('eig')
            nauxs.append(v.shape[0])
        kLR = kLI = kLR1 = kLI1 = coulG = None
    j2c = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    vbar = mydf.auxbar(fused_cell)
    vbar = fuse(vbar)
    ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=kptjs[aosym_s2])
    ovlp = [lib.pack_tril(s) for s in ovlp]
    t1 = log.timer_debug1('aoaux and int2c', *t1)

    # Estimates the buffer size based on the last contraction in G-space.
    # This contraction requires to hold nkptj copies of (naux,?) array
    # simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse))
    buflen = max(
        int(
            min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao,
                nao / 3 / mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s', max_memory,
               mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e_sph')

    def gen_int3c(auxcell, job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        if dataname in feri:
            del (feri[dataname])

        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2
        dij = (i1 - i0) * nao
        if j_only:
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii)))
        else:
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij)))
        auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij * dij * buflen, dtype=dtype)
        buf1 = numpy.empty(dij * buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                feri.create_dataset(key, shape, 'f8')
            else:
                feri.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e2 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            if j_only:
                mat = numpy.ndarray((nkptij, dii, nrow),
                                    dtype=dtype,
                                    buffer=buf)
            else:
                mat = numpy.ndarray((nkptij, dij, nrow),
                                    dtype=dtype,
                                    buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = feri['%s/%d' % (dataname, k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0 * (i0 + 1) // 2:i1 *
                               (i1 + 1) // 2] - i0 * nao
                    out = numpy.ndarray((nrow, dii),
                                        dtype=v.dtype,
                                        buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0 + nrow] = v.real
                else:
                    h5dat[naux0:naux0 + nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(feri[key])
            if is_zero(kpt):
                for i, c in enumerate(vbar):
                    if c != 0:
                        v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                            (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngs, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('    blksize (%d,%d)', Gblksize, ncol)

        shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngs, Gblksize):
            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
            else:
                v = lib.dot(j2c, v)
            feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v

    t2 = t1
    j3c_workers = numpy.zeros(len(j3c_jobs), dtype=int)
    #for job_id, ish0, ish1 in mpi.work_share_partition(j3c_jobs):
    for job_id, ish0, ish1 in mpi.work_stealing_partition(j3c_jobs):
        gen_int3c(fused_cell, job_id, ish0, ish1)
        t2 = log.alltimer_debug2('int j3c %d' % job_id, *t2)

        for k, kpt in enumerate(uniq_kpts):
            ft_fuse(job_id, k, ish0, ish1)
            t2 = log.alltimer_debug2('ft-fuse %d k %d' % (job_id, k), *t2)

        j3c_workers[job_id] = rank
    j3c_workers = mpi.allreduce(j3c_workers)
    log.debug2('j3c_workers %s', j3c_workers)
    j2c = kLRs = kLIs = ovlp = vbar = fuse = gen_int3c = ft_fuse = None
    t1 = log.timer_debug1('int3c and fuse', *t1)

    def get_segs_loc(aosym):
        off0 = numpy.asarray([ao_loc[i0] for x, i0, i1 in j3c_jobs])
        off1 = numpy.asarray([ao_loc[i1] for x, i0, i1 in j3c_jobs])
        if aosym:  # s2
            dims = off1 * (off1 + 1) // 2 - off0 * (off0 + 1) // 2
        else:
            dims = (off1 - off0) * nao
        #dims = numpy.asarray([ao_loc[i1]-ao_loc[i0] for x,i0,i1 in j3c_jobs])
        dims = numpy.hstack(
            [dims[j3c_workers == w] for w in range(mpi.pool.size)])
        job_idx = numpy.hstack(
            [numpy.where(j3c_workers == w)[0] for w in range(mpi.pool.size)])
        segs_loc = numpy.append(0, numpy.cumsum(dims))
        segs_loc = [(segs_loc[j], segs_loc[j + 1])
                    for j in numpy.argsort(job_idx)]
        return segs_loc

    segs_loc_s1 = get_segs_loc(False)
    segs_loc_s2 = get_segs_loc(True)

    if 'j3c' in feri: del (feri['j3c'])
    segsize = (max(nauxs) + mpi.pool.size - 1) // mpi.pool.size
    naux0 = rank * segsize
    for k, kptij in enumerate(kptij_lst):
        naux1 = min(nauxs[uniq_inverse[k]], naux0 + segsize)
        nrow = max(0, naux1 - naux0)
        if gamma_point(kptij):
            dtype = 'f8'
        else:
            dtype = 'c16'
        if aosym_s2[k]:
            nao_pair = nao * (nao + 1) // 2
        else:
            nao_pair = nao * nao
        feri.create_dataset('j3c/%d' % k, (nrow, nao_pair),
                            dtype,
                            maxshape=(None, nao_pair))

    def load(k, p0, p1):
        naux1 = nauxs[uniq_inverse[k]]
        slices = [(min(i * segsize + p0, naux1), min(i * segsize + p1, naux1))
                  for i in range(mpi.pool.size)]
        segs = []
        for p0, p1 in slices:
            val = []
            for job_id, worker in enumerate(j3c_workers):
                if rank == worker:
                    key = 'j3c-chunks/%d/%d' % (job_id, k)
                    val.append(feri[key][p0:p1].ravel())
            if val:
                segs.append(numpy.hstack(val))
            else:
                segs.append(numpy.zeros(0))
        return segs

    def save(k, p0, p1, segs):
        segs = mpi.alltoall(segs)
        naux1 = nauxs[uniq_inverse[k]]
        loc0, loc1 = min(p0, naux1 - naux0), min(p1, naux1 - naux0)
        nL = loc1 - loc0
        if nL > 0:
            if aosym_s2[k]:
                segs = numpy.hstack([
                    segs[i0 * nL:i1 * nL].reshape(nL, -1)
                    for i0, i1 in segs_loc_s2
                ])
            else:
                segs = numpy.hstack([
                    segs[i0 * nL:i1 * nL].reshape(nL, -1)
                    for i0, i1 in segs_loc_s1
                ])
            feri['j3c/%d' % k][loc0:loc1] = segs

    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, min(8000, mydf.max_memory - mem_now))
    if numpy.all(aosym_s2):
        if gamma_point(kptij_lst):
            blksize = max(16, int(max_memory * .5e6 / 8 / nao**2))
        else:
            blksize = max(16, int(max_memory * .5e6 / 16 / nao**2))
    else:
        blksize = max(16, int(max_memory * .5e6 / 16 / nao**2 / 2))
    log.debug1('max_momory %d MB (%d in use), blksize %d', max_memory, mem_now,
               blksize)

    t2 = t1
    with lib.call_in_background(save) as async_write:
        for k, kptji in enumerate(kptij_lst):
            for p0, p1 in lib.prange(0, segsize, blksize):
                segs = load(k, p0, p1)
                async_write(k, p0, p1, segs)
                t2 = log.timer_debug1(
                    'assemble k=%d %d:%d (in %d)' % (k, p0, p1, segsize), *t2)

    if 'j3c-chunks' in feri: del (feri['j3c-chunks'])
    if 'j3c-kptij' in feri: del (feri['j3c-kptij'])
    feri['j3c-kptij'] = kptij_lst
    t1 = log.alltimer_debug1('assembling j3c', *t1)
    feri.close()
Beispiel #48
0
def get_eri(mydf, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)):
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    nao = cell.nao_nr()
    kptijkl = _format_kpts(kpts)
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros((nao,nao,nao,nao))

    kpti, kptj, kptk, kptl = kptijkl
    nao_pair = nao * (nao+1) // 2
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6)

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl):
        eriR = numpy.zeros((nao_pair,nao_pair))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True):
            lib.ddot(LpqR.T, LpqR, sign, eriR, 1)
            LpqR = LpqI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1)
        return eriR

    elif is_zero(kpti-kptk) and is_zero(kptj-kptl):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1)
            LpqR = LpqI = None
        return eriR + eriI*1j

####################
# (kpt) i == j == k == l != 0
#
# (kpt) i == l && j == k && i != j && j != k  =>
# both vbar and ovlp are zero. It corresponds to the exchange integral.
#
# complex integrals, N^4 elements
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1)
            LpqR = LpqI = None
# transpose(0,1,3,2) because
# j == k && i == l  =>
# (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
        eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1))
        return eri.reshape(nao**2,-1)

####################
# aosym = s1, complex integrals
#
# kpti == kptj  =>  kptl == kptk
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.
#
    else:
        eriR = numpy.zeros((nao*nao,nao*nao))
        eriI = numpy.zeros((nao*nao,nao*nao))
        blksize = int(max_memory*.4e6/16/nao**2)
        for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize),
                         mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)):
            zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, sign, eriR, eriI, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eriR + eriI*1j
Beispiel #49
0
def _contract_vvvv_t2(mycc, mol, vvL, t2, out=None, verbose=None):
    '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv)

    Args:
        vvvv : None or integral object
            if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm
    '''
    _dgemm = lib.numpy_helper._dgemm
    time0 = time.clock(), time.time()
    log = logger.new_logger(mol, verbose)

    naux = vvL.shape[-1]
    nvira, nvirb = t2.shape[-2:]
    x2 = t2.reshape(-1,nvira,nvirb)
    nocc2 = x2.shape[0]
    nvir2 = nvira * nvirb
    Ht2 = numpy.ndarray(x2.shape, buffer=out)
    Ht2[:] = 0

    max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0])
    def contract_blk_(eri, i0, i1, j0, j1):
        ic = i1 - i0
        jc = j1 - j0
        #:Ht2[:,j0:j1] += numpy.einsum('xef,efab->xab', x2[:,i0:i1], eri)
        _dgemm('N', 'N', nocc2, jc*nvirb, ic*nvirb,
               x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb),
               Ht2.reshape(-1,nvir2), 1, 1, i0*nvirb, 0, j0*nvirb)

        if i0 > j0:
            #:Ht2[:,i0:i1] += numpy.einsum('xef,abef->xab', x2[:,j0:j1], eri)
            _dgemm('N', 'T', nocc2, ic*nvirb, jc*nvirb,
                   x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb),
                   Ht2.reshape(-1,nvir2), 1, 1, j0*nvirb, 0, i0*nvirb)

#TODO: check if vvL can be entirely loaded into memory
    nvir_pair = nvirb * (nvirb+1) // 2
    dmax = numpy.sqrt(max_memory*.7e6/8/nvirb**2/2)
    dmax = int(min((nvira+3)//4, max(ccsd.BLKMIN, dmax)))
    vvblk = (max_memory*1e6/8 - dmax**2*(nvirb**2*1.5+naux))/naux
    vvblk = int(min((nvira+3)//4, max(ccsd.BLKMIN, vvblk/naux)))
    eribuf = numpy.empty((dmax,dmax,nvir_pair))
    loadbuf = numpy.empty((dmax,dmax,nvirb,nvirb))
    tril2sq = lib.square_mat_in_trilu_indices(nvira)

    for i0, i1 in lib.prange(0, nvira, dmax):
        off0 = i0*(i0+1)//2
        off1 = i1*(i1+1)//2
        vvL0 = _cp(vvL[off0:off1])
        for j0, j1 in lib.prange(0, i1, dmax):
            ijL = vvL0[tril2sq[i0:i1,j0:j1] - off0].reshape(-1,naux)
            eri = numpy.ndarray(((i1-i0)*(j1-j0),nvir_pair), buffer=eribuf)
            for p0, p1 in lib.prange(0, nvir_pair, vvblk):
                vvL1 = _cp(vvL[p0:p1])
                eri[:,p0:p1] = lib.ddot(ijL, vvL1.T)
                vvL1 = None

            tmp = numpy.ndarray((i1-i0,nvirb,j1-j0,nvirb), buffer=loadbuf)
            _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p),
                                   eri.ctypes.data_as(ctypes.c_void_p),
                                   (ctypes.c_int*4)(i0, i1, j0, j1),
                                   ctypes.c_int(nvirb))
            contract_blk_(tmp, i0, i1, j0, j1)
            time0 = log.timer_debug1('vvvv [%d:%d,%d:%d]'%(i0,i1,j0,j1), *time0)
    return Ht2.reshape(t2.shape)
Beispiel #50
0
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None):
    time0 = (time.clock(), time.time())
    log = logger.new_logger(mp, verbose)

    mol = mp.mol
    int2e = mol._add_suffix('int2e')
    ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond',
                             'CVHFsetnr_direct_scf')
    nao, nocc = orbo.shape
    nvir = orbv.shape[1]
    nbas = mol.nbas
    assert (nvir <= nao)

    ao_loc = mol.ao_loc_nr()
    dmax = max(
        4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocc)**2)))
    sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax)
    dmax = max(x[2] for x in sh_ranges)
    eribuf = numpy.empty((nao, dmax, dmax, nao))
    ftmp = lib.H5TmpFile()
    log.debug('max_memory %s MB (dmax = %s) required disk space %g MB',
              max_memory, dmax,
              nocc**2 * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6)

    buf_i = numpy.empty((nocc * dmax**2 * nao))
    buf_li = numpy.empty((nocc**2 * dmax**2))
    buf1 = numpy.empty_like(buf_li)

    fint = gto.moleintor.getints4c
    jk_blk_slices = []
    count = 0
    time1 = time0
    with lib.call_in_background(ftmp.__setitem__) as save:
        for ip, (ish0, ish1, ni) in enumerate(sh_ranges):
            for jsh0, jsh1, nj in sh_ranges[:ip + 1]:
                i0, i1 = ao_loc[ish0], ao_loc[ish1]
                j0, j1 = ao_loc[jsh0], ao_loc[jsh1]
                jk_blk_slices.append((i0, i1, j0, j1))

                eri = fint(int2e,
                           mol._atm,
                           mol._bas,
                           mol._env,
                           shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0,
                                       nbas),
                           aosym='s1',
                           ao_loc=ao_loc,
                           cintopt=ao2mopt._cintopt,
                           out=eribuf)
                tmp_i = numpy.ndarray((nocc, (i1 - i0) * (j1 - j0) * nao),
                                      buffer=buf_i)
                tmp_li = numpy.ndarray((nocc, nocc * (i1 - i0) * (j1 - j0)),
                                       buffer=buf_li)
                lib.ddot(orbo.T,
                         eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao),
                         c=tmp_i)
                lib.ddot(orbo.T,
                         tmp_i.reshape(nocc * (i1 - i0) * (j1 - j0), nao).T,
                         c=tmp_li)
                tmp_li = tmp_li.reshape(nocc, nocc, (i1 - i0), (j1 - j0))
                save(str(count), tmp_li.transpose(1, 0, 2, 3))
                buf_li, buf1 = buf1, buf_li
                count += 1
                time1 = log.timer_debug1(
                    'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1),
                    *time1)
    time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0)
    eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None

    h5dat = feri.create_dataset('ovov', (nocc * nvir, nocc * nvir),
                                'f8',
                                chunks=(nvir, nvir))
    occblk = int(
        min(nocc,
            max(4, 250 / nocc, max_memory * .9e6 / 8 / (nao**2 * nocc) / 5)))

    def load(i0, eri):
        if i0 < nocc:
            i1 = min(i0 + occblk, nocc)
            for k, (p0, p1, q0, q1) in enumerate(jk_blk_slices):
                eri[:i1 - i0, :, p0:p1, q0:q1] = ftmp[str(k)][i0:i1]
                if p0 != q0:
                    dat = numpy.asarray(ftmp[str(k)][:, i0:i1])
                    eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2)

    def save(i0, i1, dat):
        for i in range(i0, i1):
            h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(
                nvir, nocc * nvir)

    orbv = numpy.asarray(orbv, order='F')
    buf_prefecth = numpy.empty((occblk, nocc, nao, nao))
    buf = numpy.empty_like(buf_prefecth)
    bufw = numpy.empty((occblk * nocc, nvir**2))
    bufw1 = numpy.empty_like(bufw)
    with lib.call_in_background(load) as prefetch:
        with lib.call_in_background(save) as bsave:
            load(0, buf_prefecth)
            for i0, i1 in lib.prange(0, nocc, occblk):
                buf, buf_prefecth = buf_prefecth, buf
                prefetch(i1, buf_prefecth)
                eri = buf[:i1 - i0].reshape((i1 - i0) * nocc, nao, nao)

                dat = _ao2mo.nr_e2(eri,
                                   orbv, (0, nvir, 0, nvir),
                                   's1',
                                   's1',
                                   out=bufw)
                bsave(
                    i0, i1,
                    dat.reshape(i1 - i0, nocc, nvir,
                                nvir).transpose(0, 2, 1, 3))
                bufw, bufw1 = bufw1, bufw
                time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1),
                                         *time1)

    time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
    return h5dat
Beispiel #51
0
def general(mydf, mo_coeffs, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)):
    warn_pbc2d_eri(mydf)
    if mydf._cderi is None:
        mydf.build()

    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros([mo.shape[1] for mo in mo_coeffs])

    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]))

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl) and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair,nkl_pair))
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))
        ijR = klR = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True):
            ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice,
                               LpqR, klR, klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR, sign, eri_mo, 1)
            LpqR = LpqI = None
        return eri_mo

    elif is_zero(kpti-kptk) and is_zero(kptj-kptl):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))

        zij = zkl = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zkl = _ztrans(buf, zij, moij, ijslice,
                               buf, zkl, mokl, klslice, sym)
            lib.dot(zij.T, zkl, sign, eri_mo, 1)
            LpqR = LpqI = buf = None
        return eri_mo

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = None
        for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False):
            buf = LpqR+LpqI*1j
            zij, zlk = _ztrans(buf, zij, moij, ijslice,
                               buf, zlk, molk, lkslice, sym)
            lib.dot(zij.T, zlk.conj(), sign, eri_mo, 1)
            LpqR = LpqI = buf = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1))
        return eri_mo.reshape(nij_pair,nlk_pair)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        nao = mo_coeffs[0].shape[0]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)

        blksize = int(min(max_memory*.3e6/16/nij_pair,
                          max_memory*.3e6/16/nkl_pair,
                          max_memory*.3e6/16/nao**2))
        zij = zkl = None
        for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \
                lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize),
                         mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)):
            zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice,
                               LrsR+LrsI*1j, zkl, mokl, klslice, False)
            lib.dot(zij.T, zkl, sign, eri_mo, 1)
            LpqR = LpqI = LrsR = LrsI = None
        return eri_mo
Beispiel #52
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory-lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)

    # The ideal way to hold the temporary integrals is to store them in the
    # cderi_file and overwrite them inplace in the second pass.  The current
    # HDF5 library does not have an efficient way to manage free space in
    # overwriting.  It often leads to the cderi_file ~2 times larger than the
    # necessary size.  For now, dumping the DF integral intermediates to a
    # separated temporary file can avoid this issue.  The DF intermediates may
    # be terribly huge. The temporary file should be placed in the same disk
    # as cderi_file.
    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2',
                    kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:,0]
    kptjs = kptij_lst[:,1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)

    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)

    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        for p0, p1 in lib.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T)
                j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T)
                j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T
            else:
                j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1],
                                    LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T)
                j2c[k][naux:] -= j2cR + j2cI * 1j
                j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj()
            LkR = LkI = None
        fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T
    j2c = coulG = None

    def cholesky_decomposed_metric(uniq_kptji_id):
        j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id])
        j2c_negative = None
        try:
            j2c = scipy.linalg.cholesky(j2c, lower=True)
            j2ctag = 'CD'
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug('cond = %.4g, drop %d bfns',
                      w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold))
            v1 = v[:,w>mydf.linear_dep_threshold].conj().T
            v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1)
            j2c = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T
            w = v = None
            j2ctag = 'eig'
        return j2c, j2c_negative, j2ctag

    feri = h5py.File(cderi_file, 'w')
    feri['j3c-kptij'] = kptij_lst
    nsegs = len(fswap['j3c-junk/0'])
    def make_kpt(uniq_kptji_id, cholesky_j2c):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1,1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao*(nao+1)//2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory-mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1)))
        else:
            Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1)))
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(buflen*Gblksize)
        pqkIbuf = numpy.empty(buflen*Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128)
        def pw_contract(istep, sh_range, j3cR, j3cI):
            bstart, bend, ncol = sh_range
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym,
                                            b, gxyz[p0:p1], Gvbase, kpt,
                                            adapted_kptjs, out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG,ncol)
                    pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T,  1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True)
                    feri['j3c/%d/%d'%(ji,istep)] = v
                else:
                    feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v)

        with lib.call_in_background(pw_contract) as compute:
            col1 = 0
            for istep, sh_range in enumerate(shranges):
                log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \
                           istep+1, len(shranges), *sh_range)
                bstart, bend, ncol = sh_range
                col0, col1 = col1, col1+ncol
                j3cR = []
                j3cI = []
                for k, idx in enumerate(adapted_ji_idx):
                    v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T
                                      for i in range(nsegs)])
                    # vbar is the interaction between the background charge
                    # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
                    if is_zero(kpt) and cell.dimension == 3:
                        for i in numpy.where(vbar != 0)[0]:
                            v[i] -= vbar[i] * ovlp[k][col0:col1]
                    j3cR.append(numpy.asarray(v.real, order='C'))
                    if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                        j3cI.append(None)
                    else:
                        j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
                compute(istep, sh_range, j3cR, j3cI)
        for ji in adapted_ji_idx:
            del(fswap['j3c-junk/%d'%ji])

    # Wrapped around boundary and symmetry between k and -k can be used
    # explicitly for the metric integrals.  We consider this symmetry
    # because it is used in the df_ao2mo module when contracting two 3-index
    # integral tensors to the 4-index 2e integral tensor. If the symmetry
    # related k-points are treated separately, the resultant 3-index tensors
    # may have inconsistent dimension due to the numerial noise when handling
    # linear dependency of j2c.
    def conj_j2c(cholesky_j2c):
        j2c, j2c_negative, j2ctag = cholesky_j2c
        if j2c_negative is None:
            return j2c.conj(), None, j2ctag
        else:
            return j2c.conj(), j2c_negative.conj(), j2ctag

    a = cell.lattice_vectors() / (2*numpy.pi)
    def kconserve_indices(kpt):
        '''search which (kpts+kpt) satisfies momentum conservation'''
        kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt)
        kdif_int = numpy.rint(kdif)
        mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL
        uniq_kptji_ids = numpy.where(mask)[0]
        return uniq_kptji_ids

    done = numpy.zeros(len(uniq_kpts), dtype=bool)
    for k, kpt in enumerate(uniq_kpts):
        if done[k]:
            continue

        log.debug1('Cholesky decomposition for j2c at kpt %s', k)
        cholesky_j2c = cholesky_decomposed_metric(k)

        # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the
        # symmetry S = S
        uniq_kptji_ids = kconserve_indices(-kpt)
        log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt)
        log.debug1("    make_kpt for uniq_kptji_ids %s", uniq_kptji_ids)
        for uniq_kptji_id in uniq_kptji_ids:
            if not done[uniq_kptji_id]:
                make_kpt(uniq_kptji_id, cholesky_j2c)
        done[uniq_kptji_ids] = True

        # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the
        # symmetry S = S*
        uniq_kptji_ids = kconserve_indices(kpt)
        log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt)
        log.debug1("    make_kpt for %s", uniq_kptji_ids)
        cholesky_j2c = conj_j2c(cholesky_j2c)
        for uniq_kptji_id in uniq_kptji_ids:
            if not done[uniq_kptji_id]:
                make_kpt(uniq_kptji_id, cholesky_j2c)
        done[uniq_kptji_ids] = True

    feri.close()
Beispiel #53
0
def _make_df_eris(cc, mo_coeff=None):
    cput0 = (time.clock(), time.time())
    eris = _ChemistsERIs()
    eris._common_init_(cc, mo_coeff)
    nocc = eris.nocc
    nmo = eris.fock.shape[0]
    nvir = nmo - nocc
    nocc_pair = nocc*(nocc+1)//2
    nvir_pair = nvir*(nvir+1)//2
    with_df = cc.with_df
    naux = eris.naux = with_df.get_naoaux()

    eris.feri = lib.H5TmpFile()
    eris.oooo = eris.feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8')
    eris.ovoo = eris.feri.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc))
    eris.ovov = eris.feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', chunks=(nocc,1,nocc,nvir))
    eris.ovvo = eris.feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc))
    eris.oovv = eris.feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir))
    # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB
    chunks = (min(nvir_pair,int(4e8/with_df.blockdim)), min(naux,with_df.blockdim))
    eris.vvL = eris.feri.create_dataset('vvL', (nvir_pair,naux), 'f8', chunks=chunks)

    Loo = numpy.empty((naux,nocc,nocc))
    Lov = numpy.empty((naux,nocc,nvir))
    fswap = lib.H5TmpFile()
    mo = numpy.asarray(eris.mo_coeff, order='F')
    ijslice = (0, nmo, 0, nmo)
    p1 = 0
    Lpq = None
    for k, eri1 in enumerate(with_df.loop()):
        Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', mosym='s1', out=Lpq)
        p0, p1 = p1, p1 + Lpq.shape[0]
        Lpq = Lpq.reshape(p1-p0,nmo,nmo)
        Loo[p0:p1] = Lpq[:,:nocc,:nocc]
        Lov[p0:p1] = Lpq[:,:nocc,nocc:]
        Lvv = lib.pack_tril(Lpq[:,nocc:,nocc:])
        eris.vvL[:,p0:p1] = Lvv.T
    Lpq = Lvv = None
    Loo = Loo.reshape(naux,nocc**2)
    Lvo = Lov.transpose(0,2,1).reshape(naux,nvir*nocc)
    Lov = Lov.reshape(naux,nocc*nvir)
    eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc)
    eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc,nvir,nocc,nocc)
    ovov = lib.ddot(Lov.T, Lov).reshape(nocc,nvir,nocc,nvir)
    eris.ovov[:] = ovov
    eris.ovvo[:] = ovov.transpose(0,1,3,2)
    ovov = None

    mem_now = lib.current_memory()[0]
    max_memory = max(0, cc.max_memory - mem_now)
    blksize = max(ccsd.BLKMIN, int((max_memory*.9e6/8-nocc**2*nvir_pair)/(nocc**2+naux)))
    oovv_tril = numpy.empty((nocc*nocc,nvir_pair))
    for p0, p1 in lib.prange(0, nvir_pair, blksize):
        oovv_tril[:,p0:p1] = lib.ddot(Loo.T, _cp(eris.vvL[p0:p1]).T)
    eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc,nocc,nvir,nvir)
    oovv_tril = Loo = None

    Lov = Lov.reshape(naux,nocc,nvir)
    vblk = max(nocc, int((max_memory*.15e6/8)/(nocc*nvir_pair)))
    vvblk = int(min(nvir_pair, 4e8/nocc, max(4, (max_memory*.8e6/8)/(vblk*nocc+naux))))
    eris.ovvv = eris.feri.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8',
                                         chunks=(nocc,1,vvblk))
    for q0, q1 in lib.prange(0, nvir_pair, vvblk):
        vvL = _cp(eris.vvL[q0:q1])
        for p0, p1 in lib.prange(0, nvir, vblk):
            tmpLov = _cp(Lov[:,:,p0:p1]).reshape(naux,-1)
            eris.ovvv[:,p0:p1,q0:q1] = lib.ddot(tmpLov.T, vvL.T).reshape(nocc,p1-p0,q1-q0)
        vvL = None
    return eris
Beispiel #54
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    t1 = (time.clock(), time.time())
    log = logger.Logger(mydf.stdout, mydf.verbose)
    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    fused_cell, fuse = fuse_auxcell(mydf, auxcell)

    # The ideal way to hold the temporary integrals is to store them in the
    # cderi_file and overwrite them inplace in the second pass.  The current
    # HDF5 library does not have an efficient way to manage free space in
    # overwriting.  It often leads to the cderi_file ~2 times larger than the
    # necessary size.  For now, dumping the DF integral intermediates to a
    # separated temporary file can avoid this issue.  The DF intermediates may
    # be terribly huge. The temporary file should be placed in the same disk
    # as cderi_file.
    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    outcore._aux_e2(cell,
                    fused_cell,
                    fswap,
                    'int3c2e',
                    aosym='s2',
                    kptij_lst=kptij_lst,
                    dataname='j3c-junk',
                    max_memory=max_memory)
    t1 = log.timer_debug1('3c2e', *t1)

    nao = cell.nao_nr()
    naux = auxcell.nao_nr()
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)

    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)

    max_memory = max(2000, mydf.max_memory - lib.current_memory()[0])
    blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        for p0, p1 in lib.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1],
                                Gvbase, kpt).T
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T)
                j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T)
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T
            else:
                j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1],
                                    LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T)
                j2c[k][naux:] -= j2cR + j2cI * 1j
                j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj()
            LkR = LkI = None
        fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T
    j2c = coulG = None

    def cholesky_decomposed_metric(uniq_kptji_id):
        j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id])
        j2c_negative = None
        try:
            j2c = scipy.linalg.cholesky(j2c, lower=True)
            j2ctag = 'CD'
        except scipy.linalg.LinAlgError:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg]))
            w, v = scipy.linalg.eigh(j2c)
            log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id)
            log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0],
                      numpy.count_nonzero(w < mydf.linear_dep_threshold))
            v1 = v[:, w > mydf.linear_dep_threshold].conj().T
            v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1)
            j2c = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T
            w = v = None
            j2ctag = 'eig'
        return j2c, j2c_negative, j2ctag

    feri = h5py.File(cderi_file, 'w')
    feri['j3c-kptij'] = kptij_lst
    nsegs = len(fswap['j3c-junk/0'])

    def make_kpt(uniq_kptji_id, cholesky_j2c):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        log.debug1('kpt = %s', kpt)
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)
        log.debug1('adapted_ji_idx = %s', adapted_ji_idx)

        j2c, j2c_negative, j2ctag = cholesky_j2c

        shls_slice = (auxcell.nbas, fused_cell.nbas)
        Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt)
        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        Gaux *= wcoulG.reshape(-1, 1)
        kLR = Gaux.real.copy('C')
        kLI = Gaux.imag.copy('C')
        Gaux = None

        if is_zero(kpt):  # kpti == kptj
            aosym = 's2'
            nao_pair = nao * (nao + 1) // 2

            if cell.dimension == 3:
                vbar = fuse(mydf.auxbar(fused_cell))
                ovlp = cell.pbc_intor('int1e_ovlp',
                                      hermi=1,
                                      kpts=adapted_kptjs)
                ovlp = [lib.pack_tril(s) for s in ovlp]
        else:
            aosym = 's1'
            nao_pair = nao**2

        mem_now = lib.current_memory()[0]
        log.debug2('memory = %s', mem_now)
        max_memory = max(2000, mydf.max_memory - mem_now)
        # nkptj for 3c-coulomb arrays plus 1 Lpq array
        buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1),
                     nao_pair)
        shranges = _guess_shell_ranges(cell, buflen, aosym)
        buflen = max([x[2] for x in shranges])
        # +1 for a pqkbuf
        if aosym == 's2':
            Gblksize = max(16,
                           int(max_memory * .1e6 / 16 / buflen / (nkptj + 1)))
        else:
            Gblksize = max(16,
                           int(max_memory * .2e6 / 16 / buflen / (nkptj + 1)))
        Gblksize = min(Gblksize, ngrids, 16384)

        def load(aux_slice):
            col0, col1 = aux_slice
            j3cR = []
            j3cI = []
            for k, idx in enumerate(adapted_ji_idx):
                v = numpy.vstack([
                    fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T
                    for i in range(nsegs)
                ])
                # vbar is the interaction between the background charge
                # and the auxiliary basis.  0D, 1D, 2D do not have vbar.
                if is_zero(kpt) and cell.dimension == 3:
                    for i in numpy.where(vbar != 0)[0]:
                        v[i] -= vbar[i] * ovlp[k][col0:col1]
                j3cR.append(numpy.asarray(v.real, order='C'))
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    j3cI.append(None)
                else:
                    j3cI.append(numpy.asarray(v.imag, order='C'))
                v = None
            return j3cR, j3cI

        pqkRbuf = numpy.empty(buflen * Gblksize)
        pqkIbuf = numpy.empty(buflen * Gblksize)
        # buf for ft_aopair
        buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128)
        cols = [sh_range[2] for sh_range in shranges]
        locs = numpy.append(0, numpy.cumsum(cols))
        tasks = zip(locs[:-1], locs[1:])
        for istep, (j3cR,
                    j3cI) in enumerate(lib.map_with_prefetch(load, tasks)):
            bstart, bend, ncol = shranges[istep]
            log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1,
                       len(shranges), bstart, bend, ncol)
            if aosym == 's2':
                shls_slice = (bstart, bend, 0, bend)
            else:
                shls_slice = (bstart, bend, 0, cell.nbas)

            for p0, p1 in lib.prange(0, ngrids, Gblksize):
                dat = ft_ao._ft_aopair_kpts(cell,
                                            Gv[p0:p1],
                                            shls_slice,
                                            aosym,
                                            b,
                                            gxyz[p0:p1],
                                            Gvbase,
                                            kpt,
                                            adapted_kptjs,
                                            out=buf)
                nG = p1 - p0
                for k, ji in enumerate(adapted_ji_idx):
                    aoao = dat[k].reshape(nG, ncol)
                    pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                    pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                    pqkR[:] = aoao.real.T
                    pqkI[:] = aoao.imag.T

                    lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1)
                    lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1)
                    if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                        lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1)
                        lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1)

            for k, ji in enumerate(adapted_ji_idx):
                if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                    v = fuse(j3cR[k])
                else:
                    v = fuse(j3cR[k] + j3cI[k] * 1j)
                if j2ctag == 'CD':
                    v = scipy.linalg.solve_triangular(j2c,
                                                      v,
                                                      lower=True,
                                                      overwrite_b=True)
                    feri['j3c/%d/%d' % (ji, istep)] = v
                else:
                    feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v)

                # low-dimension systems
                if j2c_negative is not None:
                    feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v)
            j3cR = j3cI = None

        for ji in adapted_ji_idx:
            del (fswap['j3c-junk/%d' % ji])

    # Wrapped around boundary and symmetry between k and -k can be used
    # explicitly for the metric integrals.  We consider this symmetry
    # because it is used in the df_ao2mo module when contracting two 3-index
    # integral tensors to the 4-index 2e integral tensor. If the symmetry
    # related k-points are treated separately, the resultant 3-index tensors
    # may have inconsistent dimension due to the numerial noise when handling
    # linear dependency of j2c.
    def conj_j2c(cholesky_j2c):
        j2c, j2c_negative, j2ctag = cholesky_j2c
        if j2c_negative is None:
            return j2c.conj(), None, j2ctag
        else:
            return j2c.conj(), j2c_negative.conj(), j2ctag

    a = cell.lattice_vectors() / (2 * numpy.pi)

    def kconserve_indices(kpt):
        '''search which (kpts+kpt) satisfies momentum conservation'''
        kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt)
        kdif_int = numpy.rint(kdif)
        mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL
        uniq_kptji_ids = numpy.where(mask)[0]
        return uniq_kptji_ids

    done = numpy.zeros(len(uniq_kpts), dtype=bool)
    for k, kpt in enumerate(uniq_kpts):
        if done[k]:
            continue

        log.debug1('Cholesky decomposition for j2c at kpt %s', k)
        cholesky_j2c = cholesky_decomposed_metric(k)

        # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the
        # symmetry S = S
        uniq_kptji_ids = kconserve_indices(-kpt)
        log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt)
        log.debug1("    make_kpt for uniq_kptji_ids %s", uniq_kptji_ids)
        for uniq_kptji_id in uniq_kptji_ids:
            if not done[uniq_kptji_id]:
                make_kpt(uniq_kptji_id, cholesky_j2c)
        done[uniq_kptji_ids] = True

        # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the
        # symmetry S = S*
        uniq_kptji_ids = kconserve_indices(kpt)
        log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt)
        log.debug1("    make_kpt for %s", uniq_kptji_ids)
        cholesky_j2c = conj_j2c(cholesky_j2c)
        for uniq_kptji_id in uniq_kptji_ids:
            if not done[uniq_kptji_id]:
                make_kpt(uniq_kptji_id, cholesky_j2c)
        done[uniq_kptji_ids] = True

    feri.close()
Beispiel #55
0
def get_eri(mydf, kpts=None, compact=True):
    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    q = kptj - kpti
    coulG = mydf.weighted_coulG(q, False, mydf.gs)
    nao = cell.nao_nr()
    nao_pair = nao * (nao + 1) // 2
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8)

    ####################
    # gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl):
        eriR = numpy.zeros((nao_pair, nao_pair))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory,
                                aosym='s2'):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            lib.ddot(pqkR, pqkR.T, 1, eriR, 1)
            lib.ddot(pqkI, pqkI.T, 1, eriR, 1)
            pqkR = pqkI = None
        if not compact:
            eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1)
        return eriR

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
# complex integrals, N^4 elements
    elif is_zero(kpti - kptl) and is_zero(kptj - kptk):
        eriR = numpy.zeros((nao**2, nao**2))
        eriI = numpy.zeros((nao**2, nao**2))
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory):
            vG = numpy.sqrt(coulG[p0:p1])
            pqkR *= vG
            pqkI *= vG
            # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs))
            zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = None
        pqkR = pqkI = coulG = None
        # transpose(0,1,3,2) because
        # j == k && i == l  =>
        # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl)  =>  (M|kl)
        # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1)))
        eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao),
                            axes=(0, 2, 1))
        return eri.reshape(nao**2, -1)


####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        eriR = numpy.zeros((nao**2, nao**2))
        eriI = numpy.zeros((nao**2, nao**2))
        # rho_rs(-G-k) = rho_rs(conj(G+k)) = conj(rho_sr(G+k))
        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5),
                         mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)):
            pqkR *= coulG[p0:p1]
            pqkI *= coulG[p0:p1]
            # rho_pq(G+k_pq) * conj(rho_sr(G+k_pq))
            zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1)
            pqkR = pqkI = rskR = rskI = None
        return (eriR + eriI * 1j)
Beispiel #56
0
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file):
    log = logger.Logger(mydf.stdout, mydf.verbose)
    t1 = t0 = (time.clock(), time.time())

    fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell)
    ao_loc = cell.ao_loc_nr()
    nao = ao_loc[-1]
    naux = auxcell.nao_nr()
    nkptij = len(kptij_lst)
    mesh = mydf.mesh
    Gv, Gvbase, kws = cell.get_Gv_weights(mesh)
    b = cell.reciprocal_vectors()
    gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase])
    ngrids = gxyz.shape[0]

    kptis = kptij_lst[:, 0]
    kptjs = kptij_lst[:, 1]
    kpt_ji = kptjs - kptis
    uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji)
    log.debug('Num uniq kpts %d', len(uniq_kpts))
    log.debug2('uniq_kpts %s', uniq_kpts)
    # j2c ~ (-kpt_ji | kpt_ji)
    j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts)
    j2ctags = []
    t1 = log.timer_debug1('2c2e', *t1)

    swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file))
    fswap = lib.H5TmpFile(swapfile.name)
    # Unlink swapfile to avoid trash
    swapfile = None

    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr()))
    log.debug2('max_memory %s (MB)  blocksize %s', max_memory, blksize)
    for k, kpt in enumerate(uniq_kpts):
        coulG = mydf.weighted_coulG(kpt, False, mesh)
        j2c_k = numpy.zeros_like(j2c[k])
        for p0, p1 in mydf.prange(0, ngrids, blksize):
            aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1],
                                Gvbase, kpt).T
            LkR = numpy.asarray(aoaux.real, order='C')
            LkI = numpy.asarray(aoaux.imag, order='C')
            aoaux = None

            if is_zero(kpt):  # kpti == kptj
                j2c_k[naux:] += lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T)
                j2c_k[naux:] += lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T)
            else:
                j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1],
                                    LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T)
                j2c_k[naux:] += j2cR + j2cI * 1j
            kLR = kLI = None

        j2c_k[:naux, naux:] = j2c_k[naux:, :naux].conj().T
        j2c[k] -= mpi.allreduce(j2c_k)
        j2c[k] = fuse(fuse(j2c[k]).T).T
        try:
            fswap['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True)
            j2ctags.append('CD')
        except scipy.linalg.LinAlgError as e:
            #msg =('===================================\n'
            #      'J-metric not positive definite.\n'
            #      'It is likely that mesh is not enough.\n'
            #      '===================================')
            #log.error(msg)
            #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg]))
            w, v = scipy.linalg.eigh(j2c[k])
            log.debug2('metric linear dependency for kpt %s', k)
            log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1],
                       numpy.count_nonzero(w < mydf.linear_dep_threshold))
            v1 = v[:, w > mydf.linear_dep_threshold].T.conj()
            v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1)
            fswap['j2c/%d' % k] = v1
            if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum':
                idx = numpy.where(w < -mydf.linear_dep_threshold)[0]
                if len(idx) > 0:
                    fswap['j2c-/%d' % k] = (v[:, idx] /
                                            numpy.sqrt(-w[idx])).conj().T
            w = v = v1 = None
            j2ctags.append('eig')
    j2c = coulG = None

    aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9
    j_only = numpy.all(aosym_s2)
    if gamma_point(kptij_lst):
        dtype = 'f8'
    else:
        dtype = 'c16'
    t1 = log.timer_debug1('aoaux and int2c', *t1)

    # Estimates the buffer size based on the last contraction in G-space.
    # This contraction requires to hold nkptj copies of (naux,?) array
    # simultaneously in memory.
    mem_now = max(comm.allgather(lib.current_memory()[0]))
    max_memory = max(2000, mydf.max_memory - mem_now)
    nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse))
    buflen = max(
        int(
            min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao,
                nao / 3 / mpi.pool.size)), 1)
    chunks = (buflen, nao)

    j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only)
    log.debug1('max_memory = %d MB (%d in use)  chunks %s', max_memory,
               mem_now, chunks)
    log.debug2('j3c_jobs %s', j3c_jobs)

    if j_only:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst)
    else:
        int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst)
        idxb = numpy.tril_indices(nao)
        idxb = (idxb[0] * nao + idxb[1]).astype('i')
    aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e')

    def gen_int3c(job_id, ish0, ish1):
        dataname = 'j3c-chunks/%d' % job_id
        i0 = ao_loc[ish0]
        i1 = ao_loc[ish1]
        dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2
        if j_only:
            dij = dii
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii)))
        else:
            dij = (i1 - i0) * nao
            buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij)))
        auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen)
        buflen = max([x[2] for x in auxranges])
        buf = numpy.empty(nkptij * dij * buflen, dtype=dtype)
        buf1 = numpy.empty(dij * buflen, dtype=dtype)

        naux = aux_loc[-1]
        for kpt_id, kptij in enumerate(kptij_lst):
            key = '%s/%d' % (dataname, kpt_id)
            if aosym_s2[kpt_id]:
                shape = (naux, dii)
            else:
                shape = (naux, dij)
            if gamma_point(kptij):
                fswap.create_dataset(key, shape, 'f8')
            else:
                fswap.create_dataset(key, shape, 'c16')

        naux0 = 0
        for istep, auxrange in enumerate(auxranges):
            log.alldebug2("aux_e1 job_id %d step %d", job_id, istep)
            sh0, sh1, nrow = auxrange
            sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1)
            mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf)
            mat = int3c(sub_slice, mat)

            for k, kptij in enumerate(kptij_lst):
                h5dat = fswap['%s/%d' % (dataname, k)]
                v = lib.transpose(mat[k], out=buf1)
                if not j_only and aosym_s2[k]:
                    idy = idxb[i0 * (i0 + 1) // 2:i1 *
                               (i1 + 1) // 2] - i0 * nao
                    out = numpy.ndarray((nrow, dii),
                                        dtype=v.dtype,
                                        buffer=mat[k])
                    v = numpy.take(v, idy, axis=1, out=out)
                if gamma_point(kptij):
                    h5dat[naux0:naux0 + nrow] = v.real
                else:
                    h5dat[naux0:naux0 + nrow] = v
            naux0 += nrow

    def ft_fuse(job_id, uniq_kptji_id, sh0, sh1):
        kpt = uniq_kpts[uniq_kptji_id]  # kpt = kptj - kpti
        adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0]
        adapted_kptjs = kptjs[adapted_ji_idx]
        nkptj = len(adapted_kptjs)

        j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id])
        j2ctag = j2ctags[uniq_kptji_id]
        naux0 = j2c.shape[0]
        if ('j2c-/%d' % uniq_kptji_id) in fswap:
            j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id])
        else:
            j2c_negative = None

        if is_zero(kpt):
            aosym = 's2'
        else:
            aosym = 's1'

        if aosym == 's2' and cell.dimension == 3:
            vbar = fuse(mydf.auxbar(fused_cell))
            ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs)
            ovlp = [lib.pack_tril(s) for s in ovlp]

        j3cR = [None] * nkptj
        j3cI = [None] * nkptj
        i0 = ao_loc[sh0]
        i1 = ao_loc[sh1]
        for k, idx in enumerate(adapted_ji_idx):
            key = 'j3c-chunks/%d/%d' % (job_id, idx)
            v = numpy.asarray(fswap[key])
            if aosym == 's2' and cell.dimension == 3:
                for i in numpy.where(vbar != 0)[0]:
                    v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 *
                                              (i1 + 1) // 2].ravel()
            j3cR[k] = numpy.asarray(v.real, order='C')
            if v.dtype == numpy.complex128:
                j3cI[k] = numpy.asarray(v.imag, order='C')
            v = None

        ncol = j3cR[0].shape[1]
        Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol /
                               (nkptj + 1)))  # +1 for pqkRbuf/pqkIbuf
        Gblksize = min(Gblksize, ngrids, 16384)
        pqkRbuf = numpy.empty(ncol * Gblksize)
        pqkIbuf = numpy.empty(ncol * Gblksize)
        buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128)
        log.alldebug2('job_id %d  blksize (%d,%d)', job_id, Gblksize, ncol)

        wcoulG = mydf.weighted_coulG(kpt, False, mesh)
        fused_cell_slice = (auxcell.nbas, fused_cell.nbas)
        if aosym == 's2':
            shls_slice = (sh0, sh1, 0, sh1)
        else:
            shls_slice = (sh0, sh1, 0, cell.nbas)
        for p0, p1 in lib.prange(0, ngrids, Gblksize):
            Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b,
                               gxyz[p0:p1], Gvbase, kpt)
            Gaux *= wcoulG[p0:p1, None]
            kLR = Gaux.real.copy('C')
            kLI = Gaux.imag.copy('C')
            Gaux = None

            dat = ft_ao._ft_aopair_kpts(cell,
                                        Gv[p0:p1],
                                        shls_slice,
                                        aosym,
                                        b,
                                        gxyz[p0:p1],
                                        Gvbase,
                                        kpt,
                                        adapted_kptjs,
                                        out=buf)
            nG = p1 - p0
            for k, ji in enumerate(adapted_ji_idx):
                aoao = dat[k].reshape(nG, ncol)
                pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf)
                pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf)
                pqkR[:] = aoao.real.T
                pqkI[:] = aoao.imag.T

                lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1)
                lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1)
                if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])):
                    lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1)
                    lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1)
            kLR = kLI = None

        for k, idx in enumerate(adapted_ji_idx):
            if is_zero(kpt) and gamma_point(adapted_kptjs[k]):
                v = fuse(j3cR[k])
            else:
                v = fuse(j3cR[k] + j3cI[k] * 1j)
            if j2ctag == 'CD':
                v = scipy.linalg.solve_triangular(j2c,
                                                  v,
                                                  lower=True,
                                                  overwrite_b=True)
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v
            else:
                fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot(
                    j2c, v)

            # low-dimension systems
            if j2c_negative is not None:
                fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v)

    _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap,
              log)
Beispiel #57
0
def general(mydf, mo_coeffs, kpts=None,
            compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)):
    warn_pbc2d_eri(mydf)
    cell = mydf.cell
    kptijkl = _format_kpts(kpts)
    kpti, kptj, kptk, kptl = kptijkl
    if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2:
        mo_coeffs = (mo_coeffs,) * 4
    if not _iskconserv(cell, kptijkl):
        lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in '
                        'the given k-points %s', kptijkl)
        return numpy.zeros([mo.shape[1] for mo in mo_coeffs])

    q = kptj - kpti
    mesh = mydf.mesh
    coulG = mydf.weighted_coulG(q, False, mesh)
    all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs)
    max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5)

####################
# gamma point, the integral is real and with s4 symmetry
    if gamma_point(kptijkl) and all_real:
        ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
        klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
        eri_mo = numpy.zeros((nij_pair,nkl_pair))
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[3]))

        ijR = ijI = klR = klI = buf = None
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory,
                                aosym='s2'):
            buf = lib.transpose(pqkR, out=buf)
            ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice,
                               buf, klR, klmosym, mokl, klslice, sym)
            lib.ddot(ijR.T, klR*coulG[p0:p1,None], 1, eri_mo, 1)
            buf = lib.transpose(pqkI, out=buf)
            ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice,
                               buf, klI, klmosym, mokl, klslice, sym)
            lib.ddot(ijI.T, klI*coulG[p0:p1,None], 1, eri_mo, 1)
            pqkR = pqkI = None
        return eri_mo

####################
# (kpt) i == j == k == l != 0
# (kpt) i == l && j == k && i != j && j != k  =>
#
    elif is_zero(kpti-kptl) and is_zero(kptj-kptk):
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:]
        eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex)
        sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and
               iden_coeffs(mo_coeffs[1], mo_coeffs[2]))

        zij = zlk = buf = None
        for pqkR, pqkI, p0, p1 \
                in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory):
            buf = lib.transpose(pqkR+pqkI*1j, out=buf)
            zij, zlk = _ztrans(buf, zij, moij, ijslice,
                               buf, zlk, molk, lkslice, sym)
            lib.dot(zij.T, zlk.conj()*coulG[p0:p1,None], 1, eri_mo, 1)
            pqkR = pqkI = None
        nmok = mo_coeffs[2].shape[1]
        nmol = mo_coeffs[3].shape[1]
        eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1))
        return eri_mo.reshape(nij_pair,nlk_pair)

####################
# aosym = s1, complex integrals
#
# If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave
# vector symmetry.  k is a fraction of reciprocal basis, 0 < k/b < 1, by definition.
# So  kptl/b - kptk/b  must be -1 < k/b < 1.  =>  kptl == kptk
#
    else:
        mo_coeffs = _mo_as_complex(mo_coeffs)
        nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:]
        nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:]
        eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex)

        tao = []
        ao_loc = None
        zij = zkl = buf = None
        for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \
                lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5),
                         mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)):
            buf = lib.transpose(pqkR+pqkI*1j, out=buf)
            zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij)
            buf = lib.transpose(rskR-rskI*1j, out=buf)
            zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl)
            zij *= coulG[p0:p1,None]
            lib.dot(zij.T, zkl, 1, eri_mo, 1)
            pqkR = pqkI = rskR = rskI = None
        return eri_mo