def _dm1_mo2ao(dm1, ket, bra): nao, nket = ket.shape nbra = bra.shape[1] nset = len(dm1) dm1 = lib.ddot(ket, dm1.transpose(1,0,2).reshape(nket,nset*nbra)) dm1 = dm1.reshape(nao,nset,nbra).transpose(1,0,2).reshape(nset*nao,nbra) return lib.ddot(dm1, bra.T).reshape(nset,nao,nao)
def zdotNC(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0): '''c = a*b.conj()''' cR = lib.ddot(aR, bR, alpha, cR, beta) cR = lib.ddot(aI, bI, alpha, cR, 1) cI = lib.ddot(aR, bI, -alpha, cI, beta) cI = lib.ddot(aI, bR, alpha, cI, 1) return cR, cI
def zdotNC(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0): '''c = a*b.conj()''' cR = lib.ddot(aR, bR, alpha, cR, beta) cR = lib.ddot(aI, bI, alpha, cR, 1 ) cI = lib.ddot(aR, bI,-alpha, cI, beta) cI = lib.ddot(aI, bR, alpha, cI, 1 ) return cR, cI
def zdotCN(aR, aI, bR, bI, alpha=1, cR=None, cI=None, beta=0): """c = a.conj()*b""" cR = lib.ddot(aR, bR, alpha, cR, beta) cR = lib.ddot(aI, bI, alpha, cR, 1) cI = lib.ddot(aR, bI, alpha, cI, beta) cI = lib.ddot(aI, bR, -alpha, cI, 1) return cR, cI
def make_rdm1(ci, nmo, nocc): nvir = nmo - nocc c0 = ci[0] c1 = ci[1:nocc*nvir+1].reshape(nocc,nvir) c2 = ci[nocc*nvir+1:].reshape(nocc,nocc,nvir,nvir) dov = c0*c1 * 2 dov += numpy.einsum('jb,ijab->ia', c1, c2) * 4 dov -= numpy.einsum('jb,ijba->ia', c1, c2) * 2 doo = numpy.einsum('ia,ka->ik', c1, c1) * -2 #:doo -= numpy.einsum('ijab,ikab->jk', c2, c2) * 4 #:doo += numpy.einsum('ijab,kiab->jk', c2, c2) * 2 theta = c2*2 - c2.transpose(0,1,3,2) lib.ddot(c2.reshape(nocc,-1), theta.reshape(nocc,-1).T, -2, doo, 1) dvv = numpy.einsum('ia,ic->ca', c1, c1) * 2 #:dvv += numpy.einsum('ijab,ijac->cb', c2, c2) * 4 #:dvv -= numpy.einsum('ijab,jiac->cb', c2, c2) * 2 lib.ddot(c2.reshape(-1,nvir).T, theta.reshape(-1,nvir), 2, dvv, 1) rdm1 = numpy.empty((nmo,nmo)) rdm1[:nocc,nocc:] = dov rdm1[nocc:,:nocc] = dov.T rdm1[:nocc,:nocc] = doo rdm1[nocc:,nocc:] = dvv for i in range(nocc): rdm1[i,i] += 2 return rdm1
def contract_k(pLqR, pLqI): # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' #:pLq = (LpqR + LpqI.reshape(-1,nao,nao)*1j).transpose(1,0,2) #:tmp = numpy.dot(dm, pLq.reshape(nao,-1)) #:vk += numpy.dot(pLq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) nrow = pLqR.shape[1] tmpR = numpy.ndarray((nao, nrow * nao), buffer=buf2R) if k_real: for i in range(nset): lib.ddot(dmsR[i], pLqR.reshape(nao, -1), 1, tmpR) lib.ddot(pLqR.reshape(-1, nao).T, tmpR.reshape(-1, nao), 1, vkR[i], 1) else: tmpI = numpy.ndarray((nao, nrow * nao), buffer=buf2I) for i in range(nset): zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao, -1), pLqI.reshape(nao, -1), 1, tmpR, tmpI, 0) zdotCN( pLqR.reshape(-1, nao).T, pLqI.reshape(-1, nao).T, tmpR.reshape(-1, nao), tmpI.reshape(-1, nao), 1, vkR[i], vkI[i], 1, )
def make_rdm1(ci, nmo, nocc): nvir = nmo - nocc c0 = ci[0] c1 = ci[1:nocc * nvir + 1].reshape(nocc, nvir) c2 = ci[nocc * nvir + 1:].reshape(nocc, nocc, nvir, nvir) dov = c0 * c1 * 2 dov += numpy.einsum('jb,ijab->ia', c1, c2) * 4 dov -= numpy.einsum('jb,ijba->ia', c1, c2) * 2 doo = numpy.einsum('ia,ka->ik', c1, c1) * -2 #:doo -= numpy.einsum('ijab,ikab->jk', c2, c2) * 4 #:doo += numpy.einsum('ijab,kiab->jk', c2, c2) * 2 theta = c2 * 2 - c2.transpose(0, 1, 3, 2) lib.ddot(c2.reshape(nocc, -1), theta.reshape(nocc, -1).T, -2, doo, 1) dvv = numpy.einsum('ia,ic->ca', c1, c1) * 2 #:dvv += numpy.einsum('ijab,ijac->cb', c2, c2) * 4 #:dvv -= numpy.einsum('ijab,jiac->cb', c2, c2) * 2 lib.ddot(c2.reshape(-1, nvir).T, theta.reshape(-1, nvir), 2, dvv, 1) rdm1 = numpy.empty((nmo, nmo)) rdm1[:nocc, nocc:] = dov rdm1[nocc:, :nocc] = dov.T rdm1[:nocc, :nocc] = doo rdm1[nocc:, nocc:] = dvv for i in range(nocc): rdm1[i, i] += 2 return rdm1
def transform_integrals_df(myadc): cput0 = (time.clock(), time.time()) log = logger.Logger(myadc.stdout, myadc.verbose) mo_coeff = np.asarray(myadc.mo_coeff, order='F') nocc = myadc._nocc nao, nmo = mo_coeff.shape nvir = myadc._nmo - myadc._nocc nvir_pair = nvir*(nvir+1)//2 with_df = myadc.with_df naux = with_df.get_naoaux() eris = lambda:None eris.vvvv = None eris.ovvv = None Loo = np.empty((naux,nocc,nocc)) Lvo = np.empty((naux,nvir,nocc)) eris.Lvv = np.empty((naux,nvir,nvir)) eris.Lov = np.empty((naux,nocc,nvir)) ijslice = (0, nmo, 0, nmo) Lpq = None p1 = 0 for eri1 in with_df.loop(): Lpq = ao2mo._ao2mo.nr_e2(eri1, mo_coeff, ijslice, aosym='s2', out=Lpq).reshape(-1,nmo,nmo) p0, p1 = p1, p1 + Lpq.shape[0] Loo[p0:p1] = Lpq[:,:nocc,:nocc] #Lov[p0:p1] = Lpq[:,:nocc,nocc:] eris.Lov[p0:p1] = Lpq[:,:nocc,nocc:] Lvo[p0:p1] = Lpq[:,nocc:,:nocc] eris.Lvv[p0:p1] = Lpq[:,nocc:,nocc:] Loo = Loo.reshape(naux,nocc*nocc) eris.Lov = eris.Lov.reshape(naux,nocc*nvir) Lvo = Lvo.reshape(naux,nocc*nvir) eris.Lvv = eris.Lvv.reshape(naux,nvir*nvir) eris.feri1 = lib.H5TmpFile() eris.oooo = eris.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') eris.oovv = eris.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir)) eris.ovoo = eris.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc)) eris.ovvo = eris.feri1.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc)) eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc) eris.ovoo[:] = lib.ddot(eris.Lov.T, Loo).reshape(nocc,nvir,nocc,nocc) eris.oovv[:] = lib.ddot(Loo.T, eris.Lvv).reshape(nocc,nocc,nvir,nvir) eris.ovvo[:] = lib.ddot(eris.Lov.T, Lvo).reshape(nocc,nvir,nvir,nocc) log.timer('DF-ADC integral transformation', *cput0) return eris
def get_eri(mydf): eriR = 0 kptijkl = numpy.zeros((4,3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.gs) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, aosym='s2'): vG = coulG[p0:p1] pqkRv = pqkR * vG pqkIv = pqkI * vG eriR += lib.ddot(pqkRv, pqkR.T) eriR += lib.ddot(pqkIv, pqkI.T) pqkR = pqkI = None return eriR
def get_eri(mydf): eriR = 0 kptijkl = numpy.zeros((4,3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'): vG = coulG[p0:p1] pqkRv = pqkR * vG pqkIv = pqkI * vG # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^* eriR += lib.ddot(pqkRv, pqkR.T) eriR += lib.ddot(pqkIv, pqkI.T) pqkR = pqkI = None return eriR
def _contract_compact(mydf, mos, coulG, max_memory): cell = mydf.cell moiT, mokT = mos nmoi, ngrids = moiT.shape nmok = mokT.shape[0] wcoulG = coulG * (cell.vol/ngrids) def fill_orbital_pair(moT, i0, i1, buf): npair = i1*(i1+1)//2 - i0*(i0+1)//2 out = numpy.ndarray((npair,ngrids), dtype=buf.dtype, buffer=buf) ij = 0 for i in range(i0, i1): numpy.einsum('p,jp->jp', moT[i], moT[:i+1], out=out[ij:ij+i+1]) ij += i + 1 return out eri = numpy.empty((nmoi*(nmoi+1)//2,nmok*(nmok+1)//2)) blksize = int(min(max(nmoi*(nmoi+1)//2, nmok*(nmok+1)//2), (max_memory*1e6/8 - eri.size)/2/ngrids+1)) buf = numpy.empty((blksize,ngrids)) for p0, p1 in lib.prange_tril(0, nmoi, blksize): mo_pairs_G = tools.fft(fill_orbital_pair(moiT, p0, p1, buf), mydf.mesh) mo_pairs_G*= wcoulG v = tools.ifft(mo_pairs_G, mydf.mesh) vR = numpy.asarray(v.real, order='C') for q0, q1 in lib.prange_tril(0, nmok, blksize): mo_pairs = numpy.asarray(fill_orbital_pair(mokT, q0, q1, buf), order='C') eri[p0*(p0+1)//2:p1*(p1+1)//2, q0*(q0+1)//2:q1*(q1+1)//2] = lib.ddot(vR, mo_pairs.T) v = None return eri
def get_eri_laplacian(mydf): eriR = 0 kptijkl = numpy.zeros((4,3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh) G2 = numpy.einsum('gx,gx->g', Gv, Gv) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'): vG = coulG[p0:p1] G2_vG = -G2[p0:p1] * vG # = \nabla^2 * f12(G) pqkRv = (pqkR * G2_vG) pqkIv = (pqkI * G2_vG) eriR += lib.ddot(pqkRv, pqkR.T) eriR += lib.ddot(pqkIv, pqkI.T) pqkR = pqkI = None return eriR.reshape(nao,nao,nao,nao)
def get_eri_ip1(mydf): eriR = 0 kptijkl = numpy.zeros((4,3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'): vG = coulG[p0:p1] G_vG = Gv[p0:p1].T * vG # = -i\nabla * f12(G) pqkRv = (pqkR * G_vG[:,None,:]).reshape(-1,p1-p0) pqkIv = (pqkI * G_vG[:,None,:]).reshape(-1,p1-p0) # Imaginary part of rho(G) [-i\nabla*f12(G)] [rho(G)]^* eriR += lib.ddot(pqkIv, pqkR.reshape(-1,p1-p0).T) eriR -= lib.ddot(pqkRv, pqkI.reshape(-1,p1-p0).T) pqkR = pqkI = None return eriR.reshape(3,nao,nao,nao,nao)
def get_eri_ip1(mydf): eriR = 0 kptijkl = numpy.zeros((4, 3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'): vG = coulG[p0:p1] G_vG = Gv[p0:p1].T * vG # = -i\nabla * f12(G) pqkRv = (pqkR * G_vG[:, None, :]).reshape(-1, p1 - p0) pqkIv = (pqkI * G_vG[:, None, :]).reshape(-1, p1 - p0) # Imaginary part of rho(G) [-i\nabla*f12(G)] [rho(G)]^* eriR += lib.ddot(pqkIv, pqkR.reshape(-1, p1 - p0).T) eriR -= lib.ddot(pqkRv, pqkI.reshape(-1, p1 - p0).T) pqkR = pqkI = None return eriR.reshape(3, nao, nao, nao, nao)
def get_eri_laplacian(mydf): eriR = 0 kptijkl = numpy.zeros((4, 3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = mydf.cell.get_Gv_weights(mydf.mesh) G2 = numpy.einsum('gx,gx->g', Gv, Gv) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s1'): vG = coulG[p0:p1] G2_vG = -G2[p0:p1] * vG # = \nabla^2 * f12(G) pqkRv = (pqkR * G2_vG) pqkIv = (pqkI * G2_vG) eriR += lib.ddot(pqkRv, pqkR.T) eriR += lib.ddot(pqkIv, pqkI.T) pqkR = pqkI = None return eriR.reshape(nao, nao, nao, nao)
def get_eri_3c2e(mydf): from pyscf.gto.ft_ao import ft_ao eriR = 0 kptijkl = numpy.zeros((4,3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) ao = ft_ao(cell, Gv) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'): vG = coulG[p0:p1] pqkRv = pqkR * vG pqkIv = pqkI * vG # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^* eriR += lib.ddot(pqkRv, ao[p0:p1].real) eriR += lib.ddot(pqkIv, ao[p0:p1].imag) pqkR = pqkI = None return eriR
def contract_k(pLqR, pLqI): # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' #:pLq = (LpqR + LpqI.reshape(-1,nao,nao)*1j).transpose(1,0,2) #:tmp = numpy.dot(dm, pLq.reshape(nao,-1)) #:vk += numpy.dot(pLq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) nrow = pLqR.shape[1] tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf2R) if k_real: for i in range(nset): lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR) lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao), 1, vkR[i], 1) else: tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf2I) for i in range(nset): zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i], vkI[i], 1)
def get_eri_3c2e(mydf): from pyscf.gto.ft_ao import ft_ao eriR = 0 kptijkl = numpy.zeros((4, 3)) q = numpy.zeros(3) coulG = mydf.weighted_coulG(q, False, mydf.mesh) Gv, Gvbase, kws = cell.get_Gv_weights(mydf.mesh) ao = ft_ao(cell, Gv) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.mesh, kptijkl[:2], q, aosym='s2'): vG = coulG[p0:p1] pqkRv = pqkR * vG pqkIv = pqkI * vG # rho(G) v(G) rho(-G) = rho(G) v(G) [rho(G)]^* eriR += lib.ddot(pqkRv, ao[p0:p1].real) eriR += lib.ddot(pqkIv, ao[p0:p1].imag) pqkR = pqkI = None return eriR
def contract_k(pLqR, pLqI, pjqR, pjqI): # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:for i in range(nset): #: tmp = numpy.dot(dms[i], j3c.reshape(nao,-1)) #: vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: tmp = numpy.dot(dms[i], Lpq.reshape(nao,-1)) #: vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: vkR[i] += vk1.real #: vkI[i] += vk1.imag nrow = pLqR.shape[1] tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf3R) if k_real: for i in range(nset): tmpR = lib.ddot(dmsR[i], pjqR.reshape(nao,-1), 1, tmpR) vk1R = lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao)) vkR[i] += vk1R if hermi: vkR[i] += vk1R.T else: tmpR = lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR) lib.ddot(pjqR.reshape(-1,nao).T, tmpR.reshape(-1,nao), 1, vkR[i], 1) else: tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf3I) for i in range(nset): tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pjqR.reshape(nao,-1), pjqI.reshape(nao,-1), 1, tmpR, tmpI, 0) vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao)) vkR[i] += vk1R vkI[i] += vk1I if hermi: vkR[i] += vk1R.T vkI[i] -= vk1I.T else: tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0) zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i], vkI[i], 1)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T # aoaux = LkR = LkI = coulG = None if cell.dimension == 1 or cell.dimension == 2: plain_ints = _gaussian_int(fused_cell) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, mesh)) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) aoaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, plain_ints) aoaux = aoaux.T LkR = aoaux.real * coulG[p0:p1] LkI = aoaux.imag * coulG[p0:p1] aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv) s = plain_ints[-Gaux.shape[1]:] # Only compensated Gaussians Gaux[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, s) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v = v[:,w>mydf.linear_dep_threshold].T.conj() v /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v j2ctag = 'eig' naux0 = j2c.shape[0] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = mydf.auxbar(fused_cell) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) if (cell.dimension == 1 or cell.dimension == 2) and is_zero(kpt): G0idx, SI_on_z = pbcgto.cell._SI_for_uniform_model_charge(cell, Gv[p0:p1]) if SI_on_z.size > 0: for k, aoao in enumerate(dat): aoao[G0idx] -= numpy.einsum('g,i->gi', SI_on_z, ovlp[k]) aux = fuse(ft_ao.ft_ao(fused_cell, Gv[p0:p1][G0idx]).T) vG_mod = numpy.einsum('ig,g,g->i', aux.conj(), wcoulG[p0:p1][G0idx], SI_on_z) if gamma_point(adapted_kptjs[k]): j3cR[k][:naux] -= vG_mod[:,None].real * ovlp[k] else: tmp = vG_mod[:,None] * ovlp[k] j3cR[k][:naux] -= tmp.real j3cI[k][:naux] -= tmp.imag tmp = aux = vG_mod nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c/%d/%d'%(ji,istep)] = v with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def _contract_vvvv_t2(mycc, mol, vvL, t2, out=None, verbose=None): '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv) Args: vvvv : None or integral object if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm ''' _dgemm = lib.numpy_helper._dgemm time0 = time.clock(), time.time() log = logger.new_logger(mol, verbose) naux = vvL.shape[-1] nvira, nvirb = t2.shape[-2:] x2 = t2.reshape(-1, nvira, nvirb) nocc2 = x2.shape[0] nvir2 = nvira * nvirb Ht2 = numpy.ndarray(x2.shape, buffer=out) Ht2[:] = 0 max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0]) def contract_blk_(eri, i0, i1, j0, j1): ic = i1 - i0 jc = j1 - j0 #:Ht2[:,j0:j1] += numpy.einsum('xef,efab->xab', x2[:,i0:i1], eri) _dgemm('N', 'N', nocc2, jc * nvirb, ic * nvirb, x2.reshape(-1, nvir2), eri.reshape(-1, jc * nvirb), Ht2.reshape(-1, nvir2), 1, 1, i0 * nvirb, 0, j0 * nvirb) if i0 > j0: #:Ht2[:,i0:i1] += numpy.einsum('xef,abef->xab', x2[:,j0:j1], eri) _dgemm('N', 'T', nocc2, ic * nvirb, jc * nvirb, x2.reshape(-1, nvir2), eri.reshape(-1, jc * nvirb), Ht2.reshape(-1, nvir2), 1, 1, j0 * nvirb, 0, i0 * nvirb) #TODO: check if vvL can be entirely loaded into memory nvir_pair = nvirb * (nvirb + 1) // 2 dmax = numpy.sqrt(max_memory * .7e6 / 8 / nvirb**2 / 2) dmax = int(min((nvira + 3) // 4, max(ccsd.BLKMIN, dmax))) vvblk = (max_memory * 1e6 / 8 - dmax**2 * (nvirb**2 * 1.5 + naux)) / naux vvblk = int(min((nvira + 3) // 4, max(ccsd.BLKMIN, vvblk / naux))) eribuf = numpy.empty((dmax, dmax, nvir_pair)) loadbuf = numpy.empty((dmax, dmax, nvirb, nvirb)) tril2sq = lib.square_mat_in_trilu_indices(nvira) for i0, i1 in lib.prange(0, nvira, dmax): off0 = i0 * (i0 + 1) // 2 off1 = i1 * (i1 + 1) // 2 vvL0 = _cp(vvL[off0:off1]) for j0, j1 in lib.prange(0, i1, dmax): ijL = vvL0[tril2sq[i0:i1, j0:j1] - off0].reshape(-1, naux) eri = numpy.ndarray(((i1 - i0) * (j1 - j0), nvir_pair), buffer=eribuf) for p0, p1 in lib.prange(0, nvir_pair, vvblk): vvL1 = _cp(vvL[p0:p1]) eri[:, p0:p1] = lib.ddot(ijL, vvL1.T) vvL1 = None tmp = numpy.ndarray((i1 - i0, nvirb, j1 - j0, nvirb), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int * 4)(i0, i1, j0, j1), ctypes.c_int(nvirb)) contract_blk_(tmp, i0, i1, j0, j1) time0 = log.timer_debug1('vvvv [%d:%d,%d:%d]' % (i0, i1, j0, j1), *time0) return Ht2.reshape(t2.shape)
def _make_df_eris(cc, mo_coeff=None): eris = _ChemistsERIs() eris._common_init_(cc, mo_coeff) nocc = eris.nocc nmo = eris.fock.shape[0] nvir = nmo - nocc nvir_pair = nvir * (nvir + 1) // 2 with_df = cc.with_df naux = eris.naux = with_df.get_naoaux() eris.feri = lib.H5TmpFile() eris.oooo = eris.feri.create_dataset('oooo', (nocc, nocc, nocc, nocc), 'f8') eris.ovoo = eris.feri.create_dataset('ovoo', (nocc, nvir, nocc, nocc), 'f8', chunks=(nocc, 1, nocc, nocc)) eris.ovov = eris.feri.create_dataset('ovov', (nocc, nvir, nocc, nvir), 'f8', chunks=(nocc, 1, nocc, nvir)) eris.ovvo = eris.feri.create_dataset('ovvo', (nocc, nvir, nvir, nocc), 'f8', chunks=(nocc, 1, nvir, nocc)) eris.oovv = eris.feri.create_dataset('oovv', (nocc, nocc, nvir, nvir), 'f8', chunks=(nocc, nocc, 1, nvir)) # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB chunks = (min(nvir_pair, int(4e8 / with_df.blockdim)), min(naux, with_df.blockdim)) eris.vvL = eris.feri.create_dataset('vvL', (nvir_pair, naux), 'f8', chunks=chunks) Loo = numpy.empty((naux, nocc, nocc)) Lov = numpy.empty((naux, nocc, nvir)) mo = numpy.asarray(eris.mo_coeff, order='F') ijslice = (0, nmo, 0, nmo) p1 = 0 Lpq = None for k, eri1 in enumerate(with_df.loop()): Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', mosym='s1', out=Lpq) p0, p1 = p1, p1 + Lpq.shape[0] Lpq = Lpq.reshape(p1 - p0, nmo, nmo) Loo[p0:p1] = Lpq[:, :nocc, :nocc] Lov[p0:p1] = Lpq[:, :nocc, nocc:] Lvv = lib.pack_tril(Lpq[:, nocc:, nocc:]) eris.vvL[:, p0:p1] = Lvv.T Lpq = Lvv = None Loo = Loo.reshape(naux, nocc**2) #Lvo = Lov.transpose(0,2,1).reshape(naux,nvir*nocc) Lov = Lov.reshape(naux, nocc * nvir) eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc, nocc, nocc, nocc) eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc, nvir, nocc, nocc) ovov = lib.ddot(Lov.T, Lov).reshape(nocc, nvir, nocc, nvir) eris.ovov[:] = ovov eris.ovvo[:] = ovov.transpose(0, 1, 3, 2) ovov = None mem_now = lib.current_memory()[0] max_memory = max(0, cc.max_memory - mem_now) blksize = max( ccsd.BLKMIN, int((max_memory * .9e6 / 8 - nocc**2 * nvir_pair) / (nocc**2 + naux))) oovv_tril = numpy.empty((nocc * nocc, nvir_pair)) for p0, p1 in lib.prange(0, nvir_pair, blksize): oovv_tril[:, p0:p1] = lib.ddot(Loo.T, _cp(eris.vvL[p0:p1]).T) eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc, nocc, nvir, nvir) oovv_tril = Loo = None Lov = Lov.reshape(naux, nocc, nvir) vblk = max(nocc, int((max_memory * .15e6 / 8) / (nocc * nvir_pair))) vvblk = int( min(nvir_pair, 4e8 / nocc, max(4, (max_memory * .8e6 / 8) / (vblk * nocc + naux)))) eris.ovvv = eris.feri.create_dataset('ovvv', (nocc, nvir, nvir_pair), 'f8', chunks=(nocc, 1, vvblk)) for q0, q1 in lib.prange(0, nvir_pair, vvblk): vvL = _cp(eris.vvL[q0:q1]) for p0, p1 in lib.prange(0, nvir, vblk): tmpLov = _cp(Lov[:, :, p0:p1]).reshape(naux, -1) eris.ovvv[:, p0:p1, q0:q1] = lib.ddot(tmpLov.T, vvL.T).reshape(nocc, p1 - p0, q1 - q0) vvL = None return eris
def general(mydf, mo_coeffs, kpts=None, compact=True): kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs, ) * 4 q = kptj - kpti coulG = mydf.weighted_coulG(q, False, mydf.gs) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI, 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti - kptl) and is_zero(kptj - kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) buf *= numpy.sqrt(coulG[p0:p1]).reshape(-1, 1) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR + pqkI * 1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR - rskI * 1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1].reshape(-1, 1) lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * 0.5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair, nkl_pair)) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3]) ijR = klR = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = None return eri_mo elif (abs(kpti - kptk).sum() < KPT_DIFF_TOL) and (abs(kptj - kptl).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3]) zij = zkl = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif (abs(kpti - kptl).sum() < KPT_DIFF_TOL) and (abs(kptj - kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair, nlk_pair), dtype=numpy.complex) sym = iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2]) zij = zlk = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR + LpqI * 1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1, nmol, nmok), axes=(0, 2, 1)) return eri_mo.reshape(nij_pair, nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair, nkl_pair), dtype=numpy.complex) zij = zkl = None for (LpqR, LpqI), (LrsR, LrsI) in lib.izip( mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False) ): zij, zkl = _ztrans(LpqR + LpqI * 1j, zij, moij, ijslice, LrsR + LrsI * 1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def _ao2mo_ovov(mp, orbs, feri, max_memory=2000, verbose=None): time0 = (time.clock(), time.time()) log = logger.new_logger(mp, verbose) orboa = numpy.asarray(orbs[0], order='F') orbva = numpy.asarray(orbs[1], order='F') orbob = numpy.asarray(orbs[2], order='F') orbvb = numpy.asarray(orbs[3], order='F') nao, nocca = orboa.shape noccb = orbob.shape[1] nvira = orbva.shape[1] nvirb = orbvb.shape[1] mol = mp.mol int2e = mol._add_suffix('int2e') ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nbas = mol.nbas assert(nvira <= nao) assert(nvirb <= nao) ao_loc = mol.ao_loc_nr() dmax = max(4, min(nao/3, numpy.sqrt(max_memory*.95e6/8/(nao+nocca)**2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((nao,dmax,dmax,nao)) ftmp = lib.H5TmpFile() disk = (nocca**2*(nao*(nao+dmax)/2+nvira**2) + noccb**2*(nao*(nao+dmax)/2+nvirb**2) + nocca*noccb*(nao**2+nvira*nvirb)) log.debug('max_memory %s MB (dmax = %s) required disk space %g MB', max_memory, dmax, disk*8/1e6) fint = gto.moleintor.getints4c aa_blk_slices = [] ab_blk_slices = [] count_ab = 0 count_aa = 0 time1 = time0 with lib.call_in_background(ftmp.__setitem__) as save: for ish0, ish1, ni in sh_ranges: for jsh0, jsh1, nj in sh_ranges: i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] eri = fint(int2e, mol._atm, mol._bas, mol._env, shls_slice=(0,nbas,ish0,ish1, jsh0,jsh1,0,nbas), aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) tmp_i = lib.ddot(orboa.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao)) tmp_li = lib.ddot(orbob.T, tmp_i.reshape(nocca*(i1-i0)*(j1-j0),nao).T) tmp_li = tmp_li.reshape(noccb,nocca,(i1-i0),(j1-j0)) save('ab/%d'%count_ab, tmp_li.transpose(1,0,2,3)) ab_blk_slices.append((i0,i1,j0,j1)) count_ab += 1 if ish0 >= jsh0: tmp_li = lib.ddot(orboa.T, tmp_i.reshape(nocca*(i1-i0)*(j1-j0),nao).T) tmp_li = tmp_li.reshape(nocca,nocca,(i1-i0),(j1-j0)) save('aa/%d'%count_aa, tmp_li.transpose(1,0,2,3)) tmp_i = lib.ddot(orbob.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao)) tmp_li = lib.ddot(orbob.T, tmp_i.reshape(noccb*(i1-i0)*(j1-j0),nao).T) tmp_li = tmp_li.reshape(noccb,noccb,(i1-i0),(j1-j0)) save('bb/%d'%count_aa, tmp_li.transpose(1,0,2,3)) aa_blk_slices.append((i0,i1,j0,j1)) count_aa += 1 time1 = log.timer_debug1('partial ao2mo [%d:%d,%d:%d]' % (ish0,ish1,jsh0,jsh1), *time1) time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0) eri = eribuf = tmp_i = tmp_li = None fovov = feri.create_dataset('ovov', (nocca*nvira,nocca*nvira), 'f8', chunks=(nvira,nvira)) fovOV = feri.create_dataset('ovOV', (nocca*nvira,noccb*nvirb), 'f8', chunks=(nvira,nvirb)) fOVOV = feri.create_dataset('OVOV', (noccb*nvirb,noccb*nvirb), 'f8', chunks=(nvirb,nvirb)) occblk = int(min(max(nocca,noccb), max(4, 250/nocca, max_memory*.9e6/8/(nao**2*nocca)/5))) def load_aa(h5g, nocc, i0, eri): if i0 < nocc: i1 = min(i0+occblk, nocc) for k, (p0,p1,q0,q1) in enumerate(aa_blk_slices): eri[:i1-i0,:,p0:p1,q0:q1] = h5g[str(k)][i0:i1] if p0 != q0: dat = numpy.asarray(h5g[str(k)][:,i0:i1]) eri[:i1-i0,:,q0:q1,p0:p1] = dat.transpose(1,0,3,2) def load_ab(h5g, nocca, i0, eri): if i0 < nocca: i1 = min(i0+occblk, nocca) for k, (p0,p1,q0,q1) in enumerate(ab_blk_slices): eri[:i1-i0,:,p0:p1,q0:q1] = h5g[str(k)][i0:i1] def save(h5dat, nvir, i0, i1, dat): for i in range(i0, i1): h5dat[i*nvir:(i+1)*nvir] = dat[i-i0].reshape(nvir,-1) with lib.call_in_background(save) as bsave: with lib.call_in_background(load_aa) as prefetch: buf_prefecth = numpy.empty((occblk,nocca,nao,nao)) buf = numpy.empty_like(buf_prefecth) load_aa(ftmp['aa'], nocca, 0, buf_prefecth) for i0, i1 in lib.prange(0, nocca, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['aa'], nocca, i1, buf_prefecth) eri = buf[:i1-i0].reshape((i1-i0)*nocca,nao,nao) dat = _ao2mo.nr_e2(eri, orbva, (0,nvira,0,nvira), 's1', 's1') bsave(fovov, nvira, i0, i1, dat.reshape(i1-i0,nocca,nvira,nvira).transpose(0,2,1,3)) time1 = log.timer_debug1('pass2 ao2mo for aa [%d:%d]' % (i0,i1), *time1) buf_prefecth = numpy.empty((occblk,noccb,nao,nao)) buf = numpy.empty_like(buf_prefecth) load_aa(ftmp['bb'], noccb, 0, buf_prefecth) for i0, i1 in lib.prange(0, noccb, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['bb'], noccb, i1, buf_prefecth) eri = buf[:i1-i0].reshape((i1-i0)*noccb,nao,nao) dat = _ao2mo.nr_e2(eri, orbvb, (0,nvirb,0,nvirb), 's1', 's1') bsave(fOVOV, nvirb, i0, i1, dat.reshape(i1-i0,noccb,nvirb,nvirb).transpose(0,2,1,3)) time1 = log.timer_debug1('pass2 ao2mo for bb [%d:%d]' % (i0,i1), *time1) orbvab = numpy.asarray(numpy.hstack((orbva, orbvb)), order='F') with lib.call_in_background(load_ab) as prefetch: load_ab(ftmp['ab'], nocca, 0, buf_prefecth) for i0, i1 in lib.prange(0, nocca, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['ab'], nocca, i1, buf_prefecth) eri = buf[:i1-i0].reshape((i1-i0)*noccb,nao,nao) dat = _ao2mo.nr_e2(eri, orbvab, (0,nvira,nvira,nvira+nvirb), 's1', 's1') bsave(fovOV, nvira, i0, i1, dat.reshape(i1-i0,noccb,nvira,nvirb).transpose(0,2,1,3)) time1 = log.timer_debug1('pass2 ao2mo for ab [%d:%d]' % (i0,i1), *time1) time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
def contract(myci, civec, eris): time0 = time.clock(), time.time() log = logger.Logger(myci.stdout, myci.verbose) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc nov = nocc * nvir noo = nocc**2 c0 = civec[0] c1 = civec[1:nov+1].reshape(nocc,nvir) c2 = civec[nov+1:].reshape(nocc,nocc,nvir,nvir) cinew = numpy.zeros_like(civec) t1 = cinew[1:nov+1].reshape(nocc,nvir) t2 = cinew[nov+1:].reshape(nocc,nocc,nvir,nvir) t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir)) myci.add_wvvVV_(c2, eris, t2new_tril) for i in range(nocc): for j in range(i+1): t2[i,j] = t2new_tril[i*(i+1)//2+j] t2[i,i] *= .5 t2new_tril = None time1 = log.timer_debug1('vvvv', *time0) #:t2 += numpy.einsum('iklj,klab->ijab', _cp(eris.oooo)*.5, c2) oooo = lib.transpose(_cp(eris.oooo).reshape(nocc,noo,nocc), axes=(0,2,1)) lib.ddot(oooo.reshape(noo,noo), c2.reshape(noo,-1), .5, t2.reshape(noo,-1), 1) foo = eris.fock[:nocc,:nocc].copy() fov = eris.fock[:nocc,nocc:].copy() fvv = eris.fock[nocc:,nocc:].copy() t1+= fov * c0 t1+= numpy.einsum('ib,ab->ia', c1, fvv) t1-= numpy.einsum('ja,ji->ia', c1, foo) #:t2 += numpy.einsum('bc,ijac->ijab', fvv, c2) #:t2 -= numpy.einsum('kj,kiba->ijab', foo, c2) #:t2 += numpy.einsum('ia,jb->ijab', c1, fov) lib.ddot(c2.reshape(-1,nvir), fvv, 1, t2.reshape(-1,nvir), 1) lib.ddot(foo, c2.reshape(nocc,-1),-1, t2.reshape(nocc,-1), 1) for j in range(nocc): t2[:,j] += numpy.einsum('ia,b->iab', c1, fov[j]) eris_vovv = lib.unpack_tril(eris.vovv).reshape(nvir,nocc,nvir,-1) unit = _memory_usage_inloop(nocc, nvir) max_memory = max(2000, myci.max_memory - lib.current_memory()[0]) blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory/unit))) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d', max_memory, nocc, nvir, blksize) nvir_pair = nvir * (nvir+1) // 2 for p0, p1 in lib.prange(0, nvir, blksize): eris_vvoo = _cp(eris.vvoo[p0:p1]) oovv = lib.transpose(eris_vvoo.reshape(-1,nocc**2)) #:eris_oVoV = eris_vvoo.transpose(2,0,3,1) eris_oVoV = numpy.ndarray((nocc,p1-p0,nocc,nvir)) eris_oVoV[:] = oovv.reshape(nocc,nocc,p1-p0,nvir).transpose(0,2,1,3) eris_vvoo = oovv = None #:tmp = numpy.einsum('ikca,jbkc->jiba', c2, eris_oVoV) #:t2[:,:,p0:p1] -= tmp*.5 #:t2[:,:,p0:p1] -= tmp.transpose(1,0,2,3) for i in range(nocc): tmp = lib.ddot(eris_oVoV.reshape(-1,nov), c2[i].reshape(nov,nvir)) tmp = tmp.reshape(nocc,p1-p0,nvir) t2[:,i,p0:p1] -= tmp*.5 t2[i,:,p0:p1] -= tmp eris_voov = _cp(eris.voov[p0:p1]) for i in range(p0, p1): t2[:,:,i] += eris_voov[i-p0] * (c0 * .5) t1[:,p0:p1] += numpy.einsum('jb,aijb->ia', c1, eris_voov) * 2 t1[:,p0:p1] -= numpy.einsum('jb,iajb->ia', c1, eris_oVoV) #:ovov = eris_voov.transpose(2,0,1,3) - eris_vvoo.transpose(2,0,3,1) ovov = eris_oVoV ovov *= -.5 for i in range(nocc): ovov[i] += eris_voov[:,:,i] eris_oVoV = eris_vvoo = None #:theta = c2[:,:,p0:p1] #:theta = theta * 2 - theta.transpose(1,0,2,3) #:theta = theta.transpose(2,0,1,3) theta = numpy.ndarray((p1-p0,nocc,nocc,nvir)) for i in range(p0, p1): theta[i-p0] = c2[:,:,i] * 2 theta[i-p0]-= c2[:,:,i].transpose(1,0,2) #:t2 += numpy.einsum('ckia,jckb->ijab', theta, ovov) for j in range(nocc): tmp = lib.ddot(theta.reshape(-1,nov).T, ovov[j].reshape(-1,nvir)) t2[:,j] += tmp.reshape(nocc,nvir,nvir) tmp = ovov = None t1[:,p0:p1] += numpy.einsum('aijb,jb->ia', theta, fov) eris_vooo = _cp(eris.vooo[p0:p1]) #:t1 -= numpy.einsum('bjka,bjki->ia', theta, eris_vooo) #:t2[:,:,p0:p1] -= numpy.einsum('ka,bjik->jiba', c1, eris_vooo) lib.ddot(eris_vooo.reshape(-1,nocc).T, theta.reshape(-1,nvir), -1, t1, 1) for i in range(p0, p1): t2[:,:,i] -= lib.ddot(eris_vooo[i-p0].reshape(noo,-1), c1).reshape(nocc,nocc,-1) eris_vooo = None eris_vovv = _cp(eris.vovv[p0:p1]).reshape(-1,nvir_pair) eris_vovv = lib.unpack_tril(eris_vovv).reshape(p1-p0,nocc,nvir,nvir) #:t1 += numpy.einsum('cjib,cjba->ia', theta, eris_vovv) #:t2[:,:,p0:p1] += numpy.einsum('jc,aibc->ijab', c1, eris_vovv) theta = lib.transpose(theta.reshape(-1,nocc,nvir), axes=(0,2,1)) lib.ddot(theta.reshape(-1,nocc).T, eris_vovv.reshape(-1,nvir), 1, t1, 1) for i in range(p0, p1): tmp = lib.ddot(c1, eris_vovv[i-p0].reshape(-1,nvir).T) t2[:,:,i] += tmp.reshape(nocc,nocc,nvir).transpose(1,0,2) tmp = eris_vovv = None for i in range(nocc): for j in range(i+1): t2[i,j]+= t2[j,i].T t2[j,i] = t2[i,j].T cinew[0] += numpy.einsum('ia,ia->', fov, c1) * 2 cinew[0] += numpy.einsum('aijb,ijab->', eris.voov, c2) * 2 cinew[0] -= numpy.einsum('aijb,jiab->', eris.voov, c2) return cinew
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0])) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, sign, eri_mo, 1) LpqR = LpqI = None return eri_mo elif is_zero(kpti-kptk) and is_zero(kptj-kptl): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), sign, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] nao = mo_coeffs[0].shape[0] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) blksize = int(min(max_memory*.3e6/16/nij_pair, max_memory*.3e6/16/nkl_pair, max_memory*.3e6/16/nao**2)) zij = zkl = None for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize), mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, sign, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() # try: # j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) # except scipy.linalg.LinAlgError as e: # msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') # log.error(msg) # raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) # kLR = LkR.T # kLI = LkI.T # if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR) # if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI) # kLR *= coulG.reshape(-1,1) # kLI *= coulG.reshape(-1,1) # kLRs.append(kLR) # kLIs.append(kLI) # aoaux = LkR = LkI = kLR = kLI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() #j2c[k] = fuse(fuse(j2c[k]).T).T.copy() try: j2c[k] = scipy.linalg.cholesky(fuse(fuse(j2c[k]).T).T, lower=True) except scipy.linalg.LinAlgError as e: msg =('===================================\n' 'J-metric not positive definite.\n' 'It is likely that gs is not enough.\n' '===================================') log.error(msg) raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) kLR = LkR[naux:].T kLI = LkI[naux:].T if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:]) if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:]) kLR *= coulG.reshape(-1,1) kLI *= coulG.reshape(-1,1) kLRs.append(kLR) kLIs.append(kLI) aoaux = LkR = LkI = kLR = kLI = coulG = None feri = h5py.File(mydf._cderi) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.6*1e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.2*1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.4*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj,buflen*Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d'%idx][:,col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG,ni,nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni,nao,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni,nao,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1,2,0) pqkI[:] = aoao.imag.transpose(1,2,0) aoao[:] = 0 pqkR = pqkR.reshape(-1,nG) pqkI = pqkI.reshape(-1,nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id], v, lower=True, overwrite_b=True) feri['j3c/%d'%ji][:naux,col0:col1] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) for k, kptij in enumerate(kptij_lst): v = feri['j3c/%d'%k][:naux] del(feri['j3c/%d'%k]) feri['j3c/%d'%k] = v feri.close()
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) j2ctags = [] t1 = log.timer_debug1('2c2e', *t1) swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) j2c_k = numpy.zeros_like(j2c[k]) for p0, p1 in mydf.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c_k[naux:] += lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c_k[naux:] += lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c_k[naux:] += j2cR + j2cI * 1j kLR = kLI = None j2c_k[:naux,naux:] = j2c_k[naux:,:naux].conj().T j2c[k] -= mpi.allreduce(j2c_k) j2c[k] = fuse(fuse(j2c[k]).T).T try: fswap['j2c/%d'%k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0]/w[-1], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) fswap['j2c/%d'%k] = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: fswap['j2c-/%d'%k] = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = v1 = None j2ctags.append('eig') j2c = coulG = None aosym_s2 = numpy.einsum('ix->i', abs(kptis-kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse==x).sum() for x in set(uniq_inverse)) buflen = max(int(min(max_memory*.5e6/16/naux/(nkptj_max+2)/nao, nao/3/mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e') def gen_int3c(job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1*(i1+1)//2 - i0*(i0+1)//2 if j_only: dij = dii buflen = max(8, int(max_memory*1e6/16/(nkptij*dii+dii))) else: dij = (i1 - i0) * nao buflen = max(8, int(max_memory*1e6/16/(nkptij*dij+dij))) auxranges = balance_segs(aux_loc[1:]-aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij*dij*buflen, dtype=dtype) buf1 = numpy.empty(dij*buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): fswap.create_dataset(key, shape, 'f8') else: fswap.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e1 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) mat = numpy.ndarray((nkptij,dij,nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = fswap['%s/%d'%(dataname,k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0*(i0+1)//2:i1*(i1+1)//2] - i0 * nao out = numpy.ndarray((nrow,dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0+nrow] = v.real else: h5dat[naux0:naux0+nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d'%uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0*(i0+1)//2:i1*(i1+1)//2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory*1e6/16/ncol/(nkptj+1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol*Gblksize) pqkIbuf = numpy.empty(ncol*Gblksize) buf = numpy.empty(nkptj*ncol*Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1,None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d'%(job_id,idx)][:naux0] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d'%(job_id,idx)] = lib.dot(j2c_negative, v) _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)
def general(mydf, mo_coeffs, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = klR = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): ijR, klR = _dtrans(LpqR, ijR, ijmosym, moij, ijslice, LpqR, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR, 1, eri_mo, 1) LpqR = LpqI = None return eri_mo elif (abs(kpti-kptk).sum() < KPT_DIFF_TOL) and (abs(kptj-kptl).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) zij = zkl = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zkl = _ztrans(buf, zij, moij, ijslice, buf, zkl, mokl, klslice, sym) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = buf = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = None for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): buf = LpqR+LpqI*1j zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj(), 1, eri_mo, 1) LpqR = LpqI = buf = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) zij = zkl = None for (LpqR, LpqI), (LrsR, LrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zij, zkl = _ztrans(LpqR+LpqI*1j, zij, moij, ijslice, LrsR+LrsI*1j, zkl, mokl, klslice, False) lib.dot(zij.T, zkl, 1, eri_mo, 1) LpqR = LpqI = LrsR = LrsI = None return eri_mo
def add_wvvVV_(self, t1, t2, eris, t2new_tril): time0 = time.clock(), time.time() nocc, nvir = t1.shape #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) def contract_rec_(t2new_tril, tau, eri, i0, i1, j0, j1): nao = tau.shape[-1] ic = i1 - i0 jc = j1 - j0 #: t2tril[:,j0:j1] += numpy.einsum('xcd,cdab->xab', tau[:,i0:i1], eri) _dgemm('N', 'N', nocc*(nocc+1)//2, jc*nao, ic*nao, tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao), t2new_tril.reshape(-1,nao*nao), 1, 1, i0*nao, 0, j0*nao) #: t2tril[:,i0:i1] += numpy.einsum('xcd,abcd->xab', tau[:,j0:j1], eri) _dgemm('N', 'T', nocc*(nocc+1)//2, ic*nao, jc*nao, tau.reshape(-1,nao*nao), eri.reshape(-1,jc*nao), t2new_tril.reshape(-1,nao*nao), 1, 1, j0*nao, 0, i0*nao) def contract_tril_(t2new_tril, tau, eri, a0, a): nvir = tau.shape[-1] #: t2new[i,:i+1, a] += numpy.einsum('xcd,cdb->xb', tau[:,a0:a+1], eri) _dgemm('N', 'N', nocc*(nocc+1)//2, nvir, (a+1-a0)*nvir, tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a0*nvir, 0, a*nvir) #: t2new[i,:i+1,a0:a] += numpy.einsum('xd,abd->xab', tau[:,a], eri[:a]) if a > a0: _dgemm('N', 'T', nocc*(nocc+1)//2, (a-a0)*nvir, nvir, tau.reshape(-1,nvir*nvir), eri.reshape(-1,nvir), t2new_tril.reshape(-1,nvir*nvir), 1, 1, a*nvir, 0, a0*nvir) if self.direct: # AO-direct CCSD mol = self.mol nao, nmo = self.mo_coeff.shape nao_pair = nao * (nao+1) // 2 aos = numpy.asarray(self.mo_coeff[:,nocc:].T, order='F') outbuf = numpy.empty((nocc*(nocc+1)//2,nao,nao)) tau = numpy.ndarray((nocc*(nocc+1)//2,nvir,nvir), buffer=outbuf) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1]) tau[p0:p0+i+1] += t2[i,:i+1] p0 += i + 1 tau = _ao2mo.nr_e2(tau.reshape(-1,nvir**2), aos, (0,nao,0,nao), 's1', 's1') tau = tau.reshape(-1,nao,nao) time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) ao2mopt = _ao2mo.AO2MOpt(mol, 'cint2e_sph', 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') outbuf[:] = 0 ao_loc = mol.ao_loc_nr() max_memory = max(0, self.max_memory - lib.current_memory()[0]) dmax = max(4, int(numpy.sqrt(max_memory*.95e6/8/nao**2/2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((dmax,dmax,nao,nao)) loadbuf = numpy.empty((dmax,dmax,nao,nao)) fint = gto.moleintor.getints2e for ip, (ish0, ish1, ni) in enumerate(sh_ranges): for jsh0, jsh1, nj in sh_ranges[:ip]: eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0,ish1,jsh0,jsh1), aosym='s2kl', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] tmp = numpy.ndarray((i1-i0,nao,j1-j0,nao), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*4)(i0, i1, j0, j1), ctypes.c_int(nao)) contract_rec_(outbuf, tau, tmp, i0, i1, j0, j1) time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0,ish1,jsh0,jsh1), *time0) eri = fint('cint2e_sph', mol._atm, mol._bas, mol._env, shls_slice=(ish0,ish1,ish0,ish1), aosym='s4', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) i0, i1 = ao_loc[ish0], ao_loc[ish1] for i in range(i1-i0): p0, p1 = i*(i+1)//2, (i+1)*(i+2)//2 tmp = lib.unpack_tril(eri[p0:p1], out=loadbuf) contract_tril_(outbuf, tau, tmp, i0, i0+i) time0 = logger.timer_debug1(self, 'AO-vvvv [%d:%d,%d:%d]' % (ish0,ish1,ish0,ish1), *time0) eribuf = loadbuf = eri = tmp = None mo = numpy.asarray(self.mo_coeff, order='F') tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,nocc,nmo), 's1', 's1', out=tau) t2new_tril += tmp.reshape(-1,nvir,nvir) #: tmp = numpy.einsum('ijcd,ka,kdcb->ijba', tau, t1, eris.ovvv) #: t2new -= tmp + tmp.transpose(1,0,3,2) tmp = _ao2mo.nr_e2(outbuf, mo, (nocc,nmo,0,nocc), 's1', 's1', out=tau) t2new_tril -= lib.ddot(tmp.reshape(-1,nocc), t1).reshape(-1,nvir,nvir) tmp = _ao2mo.nr_e2(outbuf, mo, (0,nocc,nocc,nmo), 's1', 's1', out=tau) #: t2new_tril -= numpy.einsum('xkb,ka->xab', tmp.reshape(-1,nocc,nvir), t1) tmp = lib.transpose(tmp.reshape(-1,nocc,nvir), axes=(0,2,1), out=outbuf) tmp = lib.ddot(tmp.reshape(-1,nocc), t1, 1, numpy.ndarray(t2new_tril.shape, buffer=tau), 0) tmp = lib.transpose(tmp.reshape(-1,nvir,nvir), axes=(0,2,1), out=outbuf) t2new_tril -= tmp.reshape(-1,nvir,nvir) else: #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: t2new += numpy.einsum('ijcd,acdb->ijab', tau, vvvv) tau = numpy.empty((nocc*(nocc+1)//2,nvir,nvir)) p0 = 0 for i in range(nocc): tau[p0:p0+i+1] = numpy.einsum('a,jb->jab', t1[i], t1[:i+1]) tau[p0:p0+i+1] += t2[i,:i+1] p0 += i + 1 time0 = logger.timer_debug1(self, 'vvvv-tau', *time0) p0 = 0 outbuf = numpy.empty((nvir,nvir,nvir)) outbuf1 = numpy.empty((nvir,nvir,nvir)) handler = None for a in range(nvir): buf = lib.unpack_tril(eris.vvvv[p0:p0+a+1], out=outbuf) outbuf, outbuf1 = outbuf1, outbuf handler = async_do(handler, contract_tril_, t2new_tril, tau, buf, 0, a) p0 += a+1 time0 = logger.timer_debug1(self, 'vvvv %d'%a, *time0) handler.join() return t2new_tril
def get_eri(mydf, kpts=None, compact=True): cell = mydf.cell if kpts is None: kptijkl = numpy.zeros((4,3)) elif numpy.shape(kpts) == (3,): kptijkl = numpy.vstack([kpts]*4) else: kptijkl = numpy.reshape(kpts, (4,3)) if mydf._cderi is None: mydf.build() kpti, kptj, kptk, kptl = kptijkl auxcell = mydf.auxcell nao = cell.nao_nr() naux = auxcell.nao_nr() nao_pair = nao * (nao+1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < 1e-9: eriR = numpy.zeros((nao_pair,nao_pair)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(j3cR.T, LpqR, 1, eriR, 1) eriR = lib.transpose_sum(eriR, inplace=True) coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .8 trilidx = numpy.tril_indices(nao) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): pqkR = numpy.asarray(pqkR.reshape(nao,nao,-1)[trilidx], order='C') pqkI = numpy.asarray(pqkI.reshape(nao,nao,-1)[trilidx], order='C') vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.dot(pqkR, pqkR.T, 1, eriR, 1) lib.dot(pqkI, pqkI.T, 1, eriR, 1) if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < 1e-9) and (abs(kptj-kptk).sum() < 1e-9): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(j3cR.T, j3cI.T, LpqR, LpqI, 1, eriR, eriI, 1) zdotNC(LpqR.T, LpqI.T, j3cR, j3cI, 1, eriR, eriI, 1) LpqR = LpqI = j3cR = j3cI = None coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) return (eriR.reshape((nao,)*4).transpose(0,1,3,2) + eriI.reshape((nao,)*4).transpose(0,1,3,2)*1j).reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for (LpqR, LpqI, jpqR, jpqI), (LrsR, LrsI, jrsR, jrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(jpqR.T, jpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) zdotNN(LpqR.T, LpqI.T, jrsR, jrsI, 1, eriR, eriI, 1) LpqR = LpqI = jpqR = jpqI = LrsR = LrsI = jrsR = jrsI = None coulG = tools.get_coulG(cell, kptj-kpti, gs=mydf.gs) / cell.vol max_memory = (mydf.max_memory - lib.current_memory()[0]) * .4 for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(cell, mydf.gs, kptijkl[:2], max_memory=max_memory), mydf.pw_loop(cell, mydf.gs,-kptijkl[2:], max_memory=max_memory)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] # rho'_rs(G-k_rs) = conj(rho_rs(-G+k_rs)) # = conj(rho_rs(-G+k_rs) - d_{k_rs:Q,rs} * Q(-G+k_rs)) # = rho_rs(G-k_rs) - conj(d_{k_rs:Q,rs}) * Q(G-k_rs) # rho_pq(G+k_pq) * conj(rho'_rs(G-k_rs)) zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) return eriR + eriI*1j
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None): time0 = (time.clock(), time.time()) log = logger.new_logger(mp, verbose) mol = mp.mol int2e = mol._add_suffix('int2e') ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nao, nocc = orbo.shape nvir = orbv.shape[1] nbas = mol.nbas assert(nvir <= nao) ao_loc = mol.ao_loc_nr() dmax = max(4, min(nao/3, numpy.sqrt(max_memory*.95e6/8/(nao+nocc)**2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((nao,dmax,dmax,nao)) ftmp = lib.H5TmpFile() log.debug('max_memory %s MB (dmax = %s) required disk space %g MB', max_memory, dmax, nocc**2*(nao*(nao+dmax)/2+nvir**2)*8/1e6) buf_i = numpy.empty((nocc*dmax**2*nao)) buf_li = numpy.empty((nocc**2*dmax**2)) buf1 = numpy.empty_like(buf_li) fint = gto.moleintor.getints4c jk_blk_slices = [] count = 0 time1 = time0 with lib.call_in_background(ftmp.__setitem__) as save: for ip, (ish0, ish1, ni) in enumerate(sh_ranges): for jsh0, jsh1, nj in sh_ranges[:ip+1]: i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] jk_blk_slices.append((i0,i1,j0,j1)) eri = fint(int2e, mol._atm, mol._bas, mol._env, shls_slice=(0,nbas,ish0,ish1, jsh0,jsh1,0,nbas), aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) tmp_i = numpy.ndarray((nocc,(i1-i0)*(j1-j0)*nao), buffer=buf_i) tmp_li = numpy.ndarray((nocc,nocc*(i1-i0)*(j1-j0)), buffer=buf_li) lib.ddot(orbo.T, eri.reshape(nao,(i1-i0)*(j1-j0)*nao), c=tmp_i) lib.ddot(orbo.T, tmp_i.reshape(nocc*(i1-i0)*(j1-j0),nao).T, c=tmp_li) tmp_li = tmp_li.reshape(nocc,nocc,(i1-i0),(j1-j0)) save(str(count), tmp_li.transpose(1,0,2,3)) buf_li, buf1 = buf1, buf_li count += 1 time1 = log.timer_debug1('partial ao2mo [%d:%d,%d:%d]' % (ish0,ish1,jsh0,jsh1), *time1) time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0) eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None h5dat = feri.create_dataset('ovov', (nocc*nvir,nocc*nvir), 'f8', chunks=(nvir,nvir)) occblk = int(min(nocc, max(4, 250/nocc, max_memory*.9e6/8/(nao**2*nocc)/5))) def load(i0, eri): if i0 < nocc: i1 = min(i0+occblk, nocc) for k, (p0,p1,q0,q1) in enumerate(jk_blk_slices): eri[:i1-i0,:,p0:p1,q0:q1] = ftmp[str(k)][i0:i1] if p0 != q0: dat = numpy.asarray(ftmp[str(k)][:,i0:i1]) eri[:i1-i0,:,q0:q1,p0:p1] = dat.transpose(1,0,3,2) def save(i0, i1, dat): for i in range(i0, i1): h5dat[i*nvir:(i+1)*nvir] = dat[i-i0].reshape(nvir,nocc*nvir) orbv = numpy.asarray(orbv, order='F') buf_prefecth = numpy.empty((occblk,nocc,nao,nao)) buf = numpy.empty_like(buf_prefecth) bufw = numpy.empty((occblk*nocc,nvir**2)) bufw1 = numpy.empty_like(bufw) with lib.call_in_background(load) as prefetch: with lib.call_in_background(save) as bsave: load(0, buf_prefecth) for i0, i1 in lib.prange(0, nocc, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(i1, buf_prefecth) eri = buf[:i1-i0].reshape((i1-i0)*nocc,nao,nao) dat = _ao2mo.nr_e2(eri, orbv, (0,nvir,0,nvir), 's1', 's1', out=bufw) bsave(i0, i1, dat.reshape(i1-i0,nocc,nvir,nvir).transpose(0,2,1,3)) bufw, bufw1 = bufw1, bufw time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0,i1), *time1) time0 = log.timer('mp2 ao2mo_ovov pass2', *time0) return h5dat
def make_rdm2(ci, nmo, nocc): nvir = nmo - nocc noo = nocc**2 nov = nocc * nvir c0 = ci[0] c1 = ci[1:nocc*nvir+1].reshape(nocc,nvir) c2 = ci[nocc*nvir+1:].reshape(nocc,nocc,nvir,nvir) doovv = c0*c2 dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2) dovoo =-numpy.einsum('ia,klac->ickl', c1, c2) doooo = lib.ddot(c2.reshape(noo,-1), c2.reshape(noo,-1).T).reshape((nocc,)*4) dvvvv = lib.ddot(c2.reshape(noo,-1).T, c2.reshape(noo,-1)).reshape((nvir,)*4) rdm2 = numpy.zeros((nmo,nmo,nmo,nmo)) rdm2[:nocc,:nocc,:nocc,:nocc] = doooo*4-doooo.transpose(1,0,2,3)*2 rdm2[:nocc,nocc:,:nocc,:nocc] = dovoo*4-dovoo.transpose(0,1,3,2)*2 rdm2[nocc:,:nocc,:nocc,:nocc] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(1,0,3,2) rdm2[:nocc,:nocc,:nocc,nocc:] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(2,3,0,1) rdm2[:nocc,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,:nocc].transpose(3,2,1,0) rdm2[:nocc,:nocc,nocc:,nocc:] = doovv*4-doovv.transpose(1,0,2,3)*2 rdm2[nocc:,nocc:,:nocc,:nocc] = rdm2[:nocc,:nocc,nocc:,nocc:].transpose(2,3,0,1) rdm2[nocc:,nocc:,nocc:,:nocc] = dvvvo*4-dvvvo.transpose(1,0,2,3)*2 rdm2[nocc:,nocc:,:nocc,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(1,0,3,2) rdm2[nocc:,:nocc,nocc:,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(2,3,0,1) rdm2[:nocc,nocc:,nocc:,nocc:] = rdm2[nocc:,nocc:,nocc:,:nocc].transpose(3,2,1,0) rdm2[nocc:,nocc:,nocc:,nocc:] = dvvvv*4-dvvvv.transpose(1,0,2,3)*2 # Fixme: This seems giving right answer, but not based on solid formula theta = c2*2 - c2.transpose(1,0,2,3) dovov = numpy.einsum('ia,kc->icka', c1, c1) * -2 #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2 #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2 dovov -= lib.ddot(c2.transpose(0,2,1,3).reshape(nov,-1).T, theta.transpose(0,2,1,3).reshape(nov,-1), 2).reshape(nocc,nvir,nocc,nvir).transpose(0,3,2,1) dovov -= lib.ddot(c2.transpose(0,3,1,2).reshape(nov,-1), theta.transpose(0,3,1,2).reshape(nov,-1).T, 2).reshape(nocc,nvir,nocc,nvir).transpose(0,3,2,1) dvoov = numpy.einsum('ia,kc->cika', c1, c1) * 4 #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2 dvoov += lib.ddot(theta.transpose(0,2,1,3).reshape(nov,-1).T, theta.transpose(0,2,1,3).reshape(nov,-1), 2).reshape(nocc,nvir,nocc,nvir).transpose(1,2,0,3) # rdm2[:nocc,nocc:,:nocc,nocc:] = dovov*4-dvoov.transpose(1,0,2,3)*2 # rdm2[nocc:,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,nocc:].transpose(1,0,3,2) # rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov*4-dovov.transpose(1,0,2,3)*2 # rdm2[:nocc,nocc:,nocc:,:nocc] = rdm2[nocc:,:nocc,:nocc,nocc:].transpose(1,0,3,2) rdm2[:nocc,nocc:,:nocc,nocc:] = dovov rdm2[nocc:,:nocc,nocc:,:nocc] = dovov.transpose(1,0,3,2) rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov rdm2[:nocc,nocc:,nocc:,:nocc] = dvoov.transpose(1,0,3,2) rdm1 = make_rdm1(ci, nmo, nocc) for i in range(nocc): rdm1[i,i] -= 2 for i in range(nocc): for j in range(nocc): rdm2[i,j,i,j] += 4 rdm2[i,j,j,i] -= 2 rdm2[i,:,i,:] += rdm1 * 2 rdm2[:,i,:,i] += rdm1 * 2 rdm2[:,i,i,:] -= rdm1 rdm2[i,:,:,i] -= rdm1 return rdm2
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): if mydf._cderi is None: mydf.build() cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'df_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao,nao,nao,nao)) kpti, kptj, kptk, kptl = kptijkl nao_pair = nao * (nao+1) // 2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*16/1e6) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair,nao_pair)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(LpqR.T, LpqR, sign, eriR, 1) LpqR = LpqI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR elif is_zero(kpti-kptk) and is_zero(kptj-kptl): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1) LpqR = LpqI = None return eriR + eriI*1j #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif is_zero(kpti-kptl) and is_zero(kptj-kptk): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI, sign in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, sign, eriR, eriI, 1) LpqR = LpqI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1)) return eri.reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) blksize = int(max_memory*.4e6/16/nao**2) for (LpqR, LpqI, sign), (LrsR, LrsI, sign1) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False, blksize), mydf.sr_loop(kptijkl[2:], max_memory, False, blksize)): zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, sign, eriR, eriI, 1) LpqR = LpqI = LrsR = LrsI = None return eriR + eriI*1j
def _make_j3c(mydf, cell, auxcell, kptij_lst): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) outcore.aux_e2(cell, fused_cell, mydf._cderi, 'cint3c2e_sph', kptij_lst=kptij_lst, dataname='j3c', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('cint2c2e_sph', hermi=1, kpts=uniq_kpts) kLRs = [] kLIs = [] # An alternative method to evalute j2c. This method might have larger numerical error? # chgcell = make_modchg_basis(auxcell, mydf.eta) # for k, kpt in enumerate(uniq_kpts): # aoaux = ft_ao.ft_ao(chgcell, Gv, None, b, gxyz, Gvbase, kpt).T # coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) # LkR = aoaux.real * coulG # LkI = aoaux.imag * coulG # j2caux = numpy.zeros_like(j2c[k]) # j2caux[naux:,naux:] = j2c[k][naux:,naux:] # if is_zero(kpt): # kpti == kptj # j2caux[naux:,naux:] -= lib.ddot(LkR, LkR.T) # j2caux[naux:,naux:] -= lib.ddot(LkI, LkI.T) # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # vbar = fuse(mydf.auxbar(fused_cell)) # s = (vbar != 0).astype(numpy.double) # j2c[k] -= numpy.einsum('i,j->ij', vbar, s) # j2c[k] -= numpy.einsum('i,j->ij', s, vbar) # else: # j2cR, j2cI = zdotCN(LkR, LkI, LkR.T, LkI.T) # j2caux[naux:,naux:] -= j2cR + j2cI * 1j # j2c[k] = j2c[k][:naux,:naux] - fuse(fuse(j2caux.T).T) # # try: # j2c[k] = scipy.linalg.cholesky(j2c[k], lower=True) # except scipy.linalg.LinAlgError as e: # msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') # log.error(msg) # raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) # kLR = LkR.T # kLI = LkI.T # if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR) # if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI) # kLR *= coulG.reshape(-1,1) # kLI *= coulG.reshape(-1,1) # kLRs.append(kLR) # kLIs.append(kLI) # aoaux = LkR = LkI = kLR = kLI = coulG = None for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) LkR = aoaux.real * coulG LkI = aoaux.imag * coulG if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:], LkI[naux:], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() j2c[k] = fuse(fuse(j2c[k]).T).T try: j2c[k] = ('CD', scipy.linalg.cholesky(j2c[k], lower=True)) except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < LINEAR_DEP_THR)) v = v[:, w > LINEAR_DEP_THR].T.conj() v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1) j2c[k] = ('eig', v) kLR = LkR[naux:].T kLI = LkI[naux:].T if not kLR.flags.c_contiguous: kLR = lib.transpose(LkR[naux:]) if not kLI.flags.c_contiguous: kLI = lib.transpose(LkI[naux:]) kLR *= coulG.reshape(-1, 1) kLI *= coulG.reshape(-1, 1) kLRs.append(kLR) kLIs.append(kLI) aoaux = LkR = LkI = kLR = kLI = coulG = None nauxs = [v[1].shape[0] for v in j2c] feri = h5py.File(mydf._cderi) def make_kpt(uniq_kptji_id): # kpt = kptj - kpti kpt = uniq_kpts[uniq_kptji_id] log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) kLR = kLRs[uniq_kptji_id] kLI = kLIs[uniq_kptji_id] if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('cint1e_ovlp_sph', hermi=1, kpts=adapted_kptjs) for k, ji in enumerate(adapted_ji_idx): ovlp[k] = lib.pack_tril(ovlp[k]) else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min( max(int(max_memory * .6 * 1e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max( 16, int(max_memory * .2 * 1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max( 16, int(max_memory * .4 * 1e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.zeros((nkptj, buflen * Gblksize), dtype=numpy.complex128) col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1 + ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.asarray(feri['j3c/%d' % idx][:, col0:col1]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None if aosym == 's2': shls_slice = (bstart, bend, 0, bend) for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ncol), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T aoao[:] = 0 lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) else: shls_slice = (bstart, bend, 0, cell.nbas) ni = ncol // nao for p0, p1 in lib.prange(0, ngs, Gblksize): ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = numpy.ndarray((nG, ni, nao), dtype=numpy.complex128, order='F', buffer=buf[k]) pqkR = numpy.ndarray((ni, nao, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ni, nao, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.transpose(1, 2, 0) pqkI[:] = aoao.imag.transpose(1, 2, 0) aoao[:] = 0 pqkR = pqkR.reshape(-1, nG) pqkI = pqkI.reshape(-1, nG) zdotCN(kLR[p0:p1].T, kLI[p0:p1].T, pqkR.T, pqkI.T, -1, j3cR[k][naux:], j3cI[k][naux:], 1) naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2c[uniq_kptji_id][0] == 'CD': v = scipy.linalg.solve_triangular(j2c[uniq_kptji_id][1], v, lower=True, overwrite_b=True) else: v = lib.dot(j2c[uniq_kptji_id][1], v) feri['j3c/%d' % ji][:naux0, col0:col1] = v naux0 = nauxs[uniq_kptji_id] for k, ji in enumerate(adapted_ji_idx): v = feri['j3c/%d' % ji][:naux0] del (feri['j3c/%d' % ji]) feri['j3c/%d' % ji] = v for k, kpt in enumerate(uniq_kpts): make_kpt(k) feri.close()
def get_eri(mydf, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_get_eri_compact', True)): cell = mydf.cell nao = cell.nao_nr() kptijkl = _format_kpts(kpts) if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros((nao,nao,nao,nao)) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) nao_pair = nao * (nao+1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair,nao_pair)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): lib.ddot(pqkR*coulG[p0:p1], pqkR.T, 1, eriR, 1) lib.ddot(pqkI*coulG[p0:p1], pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # # complex integrals, N^4 elements elif is_zero(kpti-kptl) and is_zero(kptj-kptk): eriR = numpy.zeros((nao**2,nao**2)) eriI = numpy.zeros((nao**2,nao**2)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR*coulG[p0:p1], pqkI*coulG[p0:p1], pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None pqkR = pqkI = coulG = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1)) return eri.reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: eriR = numpy.zeros((nao**2,nao**2)) eriI = numpy.zeros((nao**2,nao**2)) # # (pq|rs) = \sum_G 4\pi rho_pq rho_rs / |G+k_{pq}|^2 # rho_pq = 1/N \sum_{Tp,Tq} \int exp(-i(G+k_{pq})*r) p(r-Tp) q(r-Tq) dr # = \sum_{Tq} exp(i k_q*Tq) \int exp(-i(G+k_{pq})*r) p(r) q(r-Tq) dr # Note the k-point wrap-around for rho_rs, which leads to G+k_{pq} in FT # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = \sum_{Ts} exp(i k_s*Ts) \int exp( i(G+k_{pq})*r) r(r) s(r-Ts) dr # rho_pq can be directly evaluated by AFT (function pw_loop) # rho_pq = pw_loop(k_q, G+k_{pq}) # Assuming r(r) and s(r) are real functions, rho_rs is evaluated # rho_rs = 1/N \sum_{Tr,Ts} \int exp( i(G+k_{pq})*r) r(r-Tr) s(r-Ts) dr # = conj(\sum_{Ts} exp(-i k_s*Ts) \int exp(-i(G+k_{pq})*r) r(r) s(r-Ts) dr) # = conj( pw_loop(-k_s, G+k_{pq}) ) # # TODO: For complex AO function r(r) and s(r), pw_loop function needs to be # extended to include Gv vector in the arguments for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return (eriR+eriI*1j)
def update_amps(mycc, t1, t2, eris): time0 = time.clock(), time.time() log = logger.Logger(mycc.stdout, mycc.verbose) nocc, nvir = t1.shape nov = nocc*nvir fock = eris.fock t1new = numpy.zeros_like(t1) t2new = numpy.zeros_like(t2) t2new_tril = numpy.zeros((nocc*(nocc+1)//2,nvir,nvir)) mycc.add_wvvVV_(t1, t2, eris, t2new_tril) for i in range(nocc): for j in range(i+1): t2new[i,j] = t2new_tril[i*(i+1)//2+j] t2new[i,i] *= .5 t2new_tril = None time1 = log.timer_debug1('vvvv', *time0) #** make_inter_F fov = fock[:nocc,nocc:].copy() t1new += fov foo = fock[:nocc,:nocc].copy() foo[range(nocc),range(nocc)] = 0 foo += .5 * numpy.einsum('ia,ja->ij', fock[:nocc,nocc:], t1) fvv = fock[nocc:,nocc:].copy() fvv[range(nvir),range(nvir)] = 0 fvv -= .5 * numpy.einsum('ia,ib->ab', t1, fock[:nocc,nocc:]) #: woooo = numpy.einsum('la,ikja->ikjl', t1, eris.ooov) eris_ooov = _cp(eris.ooov) foo += numpy.einsum('kc,jikc->ij', 2*t1, eris_ooov) foo += numpy.einsum('kc,jkic->ij', -t1, eris_ooov) woooo = lib.ddot(eris_ooov.reshape(-1,nvir), t1.T).reshape((nocc,)*4) woooo = lib.transpose_sum(woooo.reshape(nocc*nocc,-1), inplace=True) woooo += _cp(eris.oooo).reshape(nocc**2,-1) woooo = _cp(woooo.reshape(nocc,nocc,nocc,nocc).transpose(0,2,1,3)) eris_ooov = None time1 = log.timer_debug1('woooo', *time1) unit = _memory_usage_inloop(nocc, nvir) max_memory = max(2000, mycc.max_memory - lib.current_memory()[0]) blksize = min(nocc, max(BLKMIN, int(max_memory/unit))) blknvir = int((max_memory*.9e6/8-blksize*nocc*nvir**2*6)/(blksize*nvir**2*2)) blknvir = min(nvir, max(BLKMIN, blknvir)) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d,%d', max_memory, nocc, nvir, blksize, blknvir) nvir_pair = nvir * (nvir+1) // 2 def prefect_ovvv(p0, p1, q0, q1, prefetch): if q1 != nvir: q0, q1 = q1, min(nvir, q1+blknvir) readbuf = numpy.ndarray((p1-p0,q1-q0,nvir_pair), buffer=prefetch) readbuf[:] = eris.ovvv[p0:p1,q0:q1] def prefect_ovov(p0, p1, buf): buf[:] = eris.ovov[p0:p1] def prefect_oovv(p0, p1, buf): buf[:] = eris.oovv[p0:p1] buflen = max(nocc*nvir**2, nocc**3) bufs = numpy.empty((5,blksize*buflen)) buf1, buf2, buf3, buf4, buf5 = bufs for p0, p1 in prange(0, nocc, blksize): #: wOoVv += numpy.einsum('iabc,jc->ijab', eris.ovvv, t1) #: wOoVv -= numpy.einsum('jbik,ka->jiba', eris.ovoo, t1) wOoVv = numpy.ndarray((nocc,p1-p0,nvir,nvir), buffer=buf3) wooVV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf4) handler = None readbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) prefetchbuf = numpy.empty((p1-p0,blknvir,nvir_pair)) ovvvbuf = numpy.empty((p1-p0,blknvir,nvir,nvir)) for q0, q1 in lib.prange(0, nvir, blknvir): if q0 == 0: readbuf[:] = eris.ovvv[p0:p1,q0:q1] else: readbuf, prefetchbuf = prefetchbuf, readbuf handler = async_do(handler, prefect_ovvv, p0, p1, q0, q1, prefetchbuf) eris_ovvv = numpy.ndarray(((p1-p0)*(q1-q0),nvir_pair), buffer=readbuf) #:eris_ovvv = _cp(eris.ovvv[p0:p1,q0:q1]) eris_ovvv = lib.unpack_tril(eris_ovvv, out=ovvvbuf) eris_ovvv = eris_ovvv.reshape(p1-p0,q1-q0,nvir,nvir) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: tmp = numpy.einsum('ijcd,kcdb->ijbk', tau, eris.ovvv) #: t2new += numpy.einsum('ka,ijbk->ijab', -t1, tmp) if not mycc.direct: eris_vovv = lib.transpose(eris_ovvv.reshape(-1,nvir)) eris_vovv = eris_vovv.reshape(nvir*(p1-p0),-1) tmp = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf1) for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nocc,q1-q0,nvir), buffer=buf2) tau = numpy.einsum('ia,jb->ijab', t1[j0:j1,q0:q1], t1, out=tau) tau += t2[j0:j1,:,q0:q1] lib.ddot(tau.reshape((j1-j0)*nocc,-1), eris_vovv.T, 1, tmp[j0:j1].reshape((j1-j0)*nocc,-1), 0) tmp1 = numpy.ndarray((nocc,nocc,nvir,p1-p0), buffer=buf2) tmp1[:] = tmp.transpose(1,0,2,3) lib.ddot(tmp1.reshape(-1,p1-p0), t1[p0:p1], -1, t2new.reshape(-1,nvir), 1) eris_vovv = tau = tmp1 = tmp = None fvv += numpy.einsum('kc,kcba->ab', 2*t1[p0:p1,q0:q1], eris_ovvv) fvv[:,q0:q1] += numpy.einsum('kc,kbca->ab', -t1[p0:p1], eris_ovvv) #: wooVV -= numpy.einsum('jc,icba->ijba', t1, eris_ovvv) tmp = t1[:,q0:q1].copy() for i in range(eris_ovvv.shape[0]): lib.ddot(tmp, eris_ovvv[i].reshape(q1-q0,-1), -1, wooVV[i].reshape(nocc,-1)) #: wOoVv += numpy.einsum('ibac,jc->jiba', eris_ovvv, t1) tmp = numpy.ndarray((nocc,p1-p0,q1-q0,nvir), buffer=buf1) lib.ddot(t1, eris_ovvv.reshape(-1,nvir).T, 1, tmp.reshape(nocc,-1)) wOoVv[:,:,q0:q1] = tmp #: theta = t2.transpose(1,0,2,3) * 2 - t2 #: t1new += numpy.einsum('ijcb,jcba->ia', theta, eris.ovvv) theta = tmp theta[:] = t2[p0:p1,:,q0:q1,:].transpose(1,0,2,3) theta *= 2 theta -= t2[:,p0:p1,q0:q1,:] lib.ddot(theta.reshape(nocc,-1), eris_ovvv.reshape(-1,nvir), 1, t1new, 1) theta = tmp = None handler.join() readbuf = prefetchbuf = ovvvbuf = eris_ovvv = None time2 = log.timer_debug1('ovvv [%d:%d]'%(p0, p1), *time1) tmp = numpy.ndarray((nocc,p1-p0,nvir,nocc), buffer=buf1) tmp[:] = _cp(eris.ovoo[p0:p1]).transpose(2,0,1,3) lib.ddot(tmp.reshape(-1,nocc), t1, -1, wOoVv.reshape(-1,nvir), 1) eris_ooov = _cp(eris.ooov[p0:p1]) eris_oovv = numpy.empty((p1-p0,nocc,nvir,nvir)) handler = lib.background_thread(prefect_oovv, p0, p1, eris_oovv) tmp = numpy.ndarray((p1-p0,nocc,nvir,nocc), buffer=buf1) tmp[:] = eris_ooov.transpose(0,1,3,2) #: wooVV = numpy.einsum('ka,ijkb->ijba', t1, eris.ooov[p0:p1]) lib.ddot(tmp.reshape(-1,nocc), t1, 1, wooVV.reshape(-1,nvir), 1) t2new[p0:p1] += wOoVv.transpose(1,0,2,3) #:eris_oovv = _cp(eris.oovv[p0:p1]) handler.join() eris_ovov = numpy.empty((p1-p0,nvir,nocc,nvir)) handler = lib.background_thread(prefect_ovov, p0, p1, eris_ovov) #: g2 = 2 * eris.oOVv - eris.oovv #: t1new += numpy.einsum('jb,ijba->ia', t1, g2) t1new[p0:p1] += numpy.einsum('jb,ijba->ia', -t1, eris_oovv) wooVV -= eris_oovv #tmp = numpy.einsum('ic,jkbc->jikb', t1, eris_oovv) #t2new[p0:p1] += numpy.einsum('ka,jikb->ijba', -t1, tmp) tmp1 = numpy.ndarray((nocc,nocc*nvir), buffer=buf1) tmp2 = numpy.ndarray((nocc*nvir,nocc), buffer=buf2) for j in range(p1-p0): tmp = lib.ddot(t1, eris_oovv[j].reshape(-1,nvir).T, 1, tmp1) lib.transpose(_cp(tmp).reshape(nocc,nocc,nvir), axes=(0,2,1), out=tmp2) t2new[:,p0+j] -= lib.ddot(tmp2, t1).reshape(nocc,nvir,nvir) eris_oovv = None #:eris_ovov = _cp(eris.ovov[p0:p1]) handler.join() for i in range(p1-p0): t2new[p0+i] += eris_ovov[i].transpose(1,0,2) * .5 t1new[p0:p1] += numpy.einsum('jb,iajb->ia', 2*t1, eris_ovov) #:tmp = numpy.einsum('ic,jbkc->jibk', t1, eris_ovov) #:t2new[p0:p1] += numpy.einsum('ka,jibk->jiba', -t1, tmp) for j in range(p1-p0): lib.ddot(t1, eris_ovov[j].reshape(-1,nvir).T, 1, tmp1) lib.ddot(tmp1.reshape(-1,nocc), t1, -1, t2new[p0+j].reshape(-1,nvir), 1) tmp1 = tmp2 = tmp = None fov[p0:p1] += numpy.einsum('kc,iakc->ia', t1, eris_ovov) * 2 fov[p0:p1] -= numpy.einsum('kc,icka->ia', t1, eris_ovov) #: fvv -= numpy.einsum('ijca,ibjc->ab', theta, eris.ovov) #: foo += numpy.einsum('iakb,jkba->ij', eris.ovov, theta) tau = numpy.ndarray((nocc,nvir,nvir), buffer=buf1) theta = numpy.ndarray((nocc,nvir,nvir), buffer=buf2) for i in range(p1-p0): tau = numpy.einsum('a,jb->jab', t1[p0+i]*.5, t1, out=tau) tau += t2[p0+i] theta = lib.transpose(tau, axes=(0,2,1), out=theta) theta *= 2 theta -= tau vov = lib.transpose(eris_ovov[i].reshape(nvir,-1), out=tau) lib.ddot(vov.reshape(nocc,-1), theta.reshape(nocc,-1).T, 1, foo, 1) lib.ddot(theta.reshape(-1,nvir).T, eris_ovov[i].reshape(nvir,-1).T, -1, fvv, 1) tau = theta = vov = None #: theta = t2.transpose(0,2,1,3) * 2 - t2.transpose(0,3,2,1) #: t1new += numpy.einsum('jb,ijba->ia', fov, theta) #: t1new -= numpy.einsum('kijb,kjba->ia', eris_ooov, theta) theta = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) for i in range(p1-p0): tmp = t2[p0+i].transpose(0,2,1) * 2 tmp-= t2[p0+i] lib.ddot(eris_ooov[i].reshape(nocc,-1), tmp.reshape(-1,nvir), -1, t1new, 1) lib.transpose(_cp(tmp).reshape(-1,nvir), out=theta[i]) # theta[i] = tmp.transpose(2,0,1) t1new += numpy.einsum('jb,jbia->ia', fov[p0:p1], theta) eris_ooov = None #: wOVov += eris.ovov #: tau = theta - numpy.einsum('ic,kb->ikcb', t1, t1*2) #: wOVov += .5 * numpy.einsum('jakc,ikcb->jiba', eris.ovov, tau) #: wOVov -= .5 * numpy.einsum('jcka,ikcb->jiba', eris.ovov, t2) #: t2new += numpy.einsum('ikca,kjbc->ijba', theta, wOVov) for i in range(p1-p0): wOoVv[:,i] += wooVV[i]*.5 #: jiba + ijba*.5 wOVov = lib.transpose(wOoVv.reshape(nocc,-1,nvir), axes=(0,2,1), out=buf5) wOVov = wOVov.reshape(nocc,nvir,-1,nvir) eris_OVov = lib.transpose(eris_ovov.reshape(-1,nov), out=buf3) eris_OVov = eris_OVov.reshape(nocc,nvir,-1,nvir) wOVov += eris_OVov theta = theta.reshape(-1,nov) for i in range(nocc): # OVov-OVov.transpose(0,3,2,1)*.5 eris_OVov[i] -= eris_OVov[i].transpose(2,1,0)*.5 for j0, j1 in prange(0, nocc, blksize): tau = numpy.ndarray((j1-j0,nvir,nocc,nvir), buffer=buf2) for i in range(j1-j0): tau[i] = t2[j0+i].transpose(1,0,2) * 2 tau[i] -= t2[j0+i].transpose(2,0,1) tau[i] -= numpy.einsum('a,jb->bja', t1[j0+i]*2, t1) #: wOVov[j0:j1] += .5 * numpy.einsum('iakc,jbkc->jbai', eris_ovov, tau) lib.ddot(tau.reshape(-1,nov), eris_OVov.reshape(nov,-1), .5, wOVov[j0:j1].reshape((j1-j0)*nvir,-1), 1) #theta = t2[p0:p1] * 2 - t2[p0:p1].transpose(0,1,3,2) #: t2new[j0:j1] += numpy.einsum('iack,jbck->jiba', theta, wOVov[j0:j1]) tmp = lib.ddot(wOVov[j0:j1].reshape((j1-j0)*nvir,-1), theta, 1, tau.reshape(-1,nov)).reshape(-1,nvir,nocc,nvir) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,0,2) theta = wOoVv = wOVov = eris_OVov = tmp = tau = None time2 = log.timer_debug1('wOVov [%d:%d]'%(p0, p1), *time2) #: tau = t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woooo += numpy.einsum('ijba,klab->ijkl', eris.oOVv, tau) #: tau = .5*t2 + numpy.einsum('ia,jb->ijab', t1, t1) #: woVoV += numpy.einsum('jkca,ikbc->ijba', tau, eris.oOVv) tmp = numpy.ndarray((p1-p0,nvir,nocc,nvir), buffer=buf1) tmp[:] = wooVV.transpose(0,2,1,3) woVoV = lib.transpose(_cp(tmp).reshape(-1,nov), out=buf4).reshape(nocc,nvir,p1-p0,nvir) eris_oOvV = numpy.ndarray((p1-p0,nocc,nvir,nvir), buffer=buf3) eris_oOvV[:] = eris_ovov.transpose(0,2,1,3) eris_oVOv = lib.transpose(eris_oOvV.reshape(-1,nov,nvir), axes=(0,2,1), out=buf5) eris_oVOv = eris_oVOv.reshape(-1,nvir,nocc,nvir) for j0, j1 in prange(0, nocc, blksize): tau = make_tau(t2[j0:j1], t1[j0:j1], t1, 1, out=buf2) #: woooo[p0:p1,:,j0:j1] += numpy.einsum('ijab,klab->ijkl', eris_oOvV, tau) _dgemm('N', 'T', (p1-p0)*nocc, (j1-j0)*nocc, nvir*nvir, eris_oOvV.reshape(-1,nvir*nvir), tau.reshape(-1,nvir*nvir), woooo[p0:p1].reshape(-1,nocc*nocc), 1, 1, 0, 0, j0*nocc) for i in range(j1-j0): tau[i] -= t2[j0+i] * .5 #: woVoV[j0:j1] += numpy.einsum('jkca,ickb->jiab', tau, eris_ovov) lib.ddot(lib.transpose(tau.reshape(-1,nov,nvir), axes=(0,2,1)).reshape(-1,nov), eris_oVOv.reshape(-1,nov).T, 1, woVoV[j0:j1].reshape((j1-j0)*nvir,-1), 1) time2 = log.timer_debug1('woVoV [%d:%d]'%(p0, p1), *time2) tau = make_tau(t2[p0:p1], t1[p0:p1], t1, 1, out=buf2) #: t2new += .5 * numpy.einsum('klij,klab->ijab', woooo[p0:p1], tau) lib.ddot(woooo[p0:p1].reshape(-1,nocc*nocc).T, tau.reshape(-1,nvir*nvir), .5, t2new.reshape(nocc*nocc,-1), 1) eris_ovov = eris_oVOv = eris_oOvV = wooVV = tau = tmp = None t2ibja = lib.transpose(_cp(t2[p0:p1]).reshape(-1,nov,nvir), axes=(0,2,1), out=buf1).reshape(-1,nvir,nocc,nvir) tmp = numpy.ndarray((blksize,nvir,nocc,nvir), buffer=buf2) for j0, j1 in prange(0, nocc, blksize): #: t2new[j0:j1] += numpy.einsum('ibkc,kcja->ijab', woVoV[j0:j1], t2ibja) lib.ddot(woVoV[j0:j1].reshape((j1-j0)*nvir,-1), t2ibja.reshape(-1,nov), 1, tmp[:j1-j0].reshape(-1,nov)) for i in range(j1-j0): t2new[j0+i] += tmp[i].transpose(1,2,0) t2new[j0+i] += tmp[i].transpose(1,0,2) * .5 woVoV = t2ibja = tmp = None time1 = log.timer_debug1('contract occ [%d:%d]'%(p0, p1), *time1) buf1 = buf2 = buf3 = buf4 = buf5 = bufs = None time1 = log.timer_debug1('contract loop', *time0) woooo = None ft_ij = foo + numpy.einsum('ja,ia->ij', .5*t1, fov) ft_ab = fvv - numpy.einsum('ia,ib->ab', .5*t1, fov) #: t2new += numpy.einsum('ijac,bc->ijab', t2, ft_ab) #: t2new -= numpy.einsum('ki,kjab->ijab', ft_ij, t2) lib.ddot(t2.reshape(-1,nvir), ft_ab.T, 1, t2new.reshape(-1,nvir), 1) lib.ddot(ft_ij.T, t2.reshape(nocc,-1),-1, t2new.reshape(nocc,-1), 1) mo_e = fock.diagonal() eia = mo_e[:nocc,None] - mo_e[None,nocc:] t1new += numpy.einsum('ib,ab->ia', t1, fvv) t1new -= numpy.einsum('ja,ji->ia', t1, foo) t1new /= eia #: t2new = t2new + t2new.transpose(1,0,3,2) ij = 0 for i in range(nocc): for j in range(i+1): t2new[i,j] += t2new[j,i].T t2new[i,j] /= lib.direct_sum('a,b->ab', eia[i], eia[j]) t2new[j,i] = t2new[i,j].T ij += 1 time0 = log.timer_debug1('update t1 t2', *time0) return t1new, t2new
def get_eri(mydf, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, mydf.max_memory - lib.current_memory()[0] - nao ** 4 * 8 / 1e6) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL: eriR = numpy.zeros((nao_pair, nao_pair)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(LpqR.T, LpqR, 1, eriR, 1) LpqR = LpqI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao ** 2, -1) return eriR elif (abs(kpti - kptk).sum() < KPT_DIFF_TOL) and (abs(kptj - kptl).sum() < KPT_DIFF_TOL): eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None return eriR + eriI * 1j #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti - kptl).sum() < KPT_DIFF_TOL) and (abs(kptj - kptk).sum() < KPT_DIFF_TOL): eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1)) return eri.reshape(nao ** 2, -1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao * nao, nao * nao)) eriI = numpy.zeros((nao * nao, nao * nao)) for (LpqR, LpqI), (LrsR, LrsI) in lib.izip( mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False) ): zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) LpqR = LpqI = LrsR = LrsI = None return eriR + eriI * 1j
def get_eri(mydf, kpts=None, compact=True): if mydf._cderi is None: mydf.build() cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]-nao**4*8/1e6) #################### # gamma point, the integral is real and with s4 symmetry if abs(kptijkl).sum() < KPT_DIFF_TOL: eriR = numpy.zeros((nao_pair,nao_pair)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, True): lib.ddot(LpqR.T, LpqR, 1, eriR, 1) LpqR = LpqI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2,-1) return eriR elif (abs(kpti-kptk).sum() < KPT_DIFF_TOL) and (abs(kptj-kptl).sum() < KPT_DIFF_TOL): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNN(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None return eriR + eriI*1j #################### # (kpt) i == j == k == l != 0 # # (kpt) i == l && j == k && i != j && j != k => # both vbar and ovlp are zero. It corresponds to the exchange integral. # # complex integrals, N^4 elements elif (abs(kpti-kptl).sum() < KPT_DIFF_TOL) and (abs(kptj-kptk).sum() < KPT_DIFF_TOL): eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for LpqR, LpqI in mydf.sr_loop(kptijkl[:2], max_memory, False): zdotNC(LpqR.T, LpqI.T, LpqR, LpqI, 1, eriR, eriI, 1) LpqR = LpqI = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) eri = lib.transpose((eriR+eriI*1j).reshape(-1,nao,nao), axes=(0,2,1)) return eri.reshape(nao**2,-1) #################### # aosym = s1, complex integrals # # kpti == kptj => kptl == kptk # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. # else: eriR = numpy.zeros((nao*nao,nao*nao)) eriI = numpy.zeros((nao*nao,nao*nao)) for (LpqR, LpqI), (LrsR, LrsI) in \ lib.izip(mydf.sr_loop(kptijkl[:2], max_memory, False), mydf.sr_loop(kptijkl[2:], max_memory, False)): zdotNN(LpqR.T, LpqI.T, LrsR, LrsI, 1, eriR, eriI, 1) LpqR = LpqI = LrsR = LrsI = None return eriR + eriI*1j
def __init__(self, cc, mo_coeff=None, method='incore'): cput0 = (time.clock(), time.time()) moidx = numpy.ones(cc.mo_occ.size, dtype=numpy.bool) if isinstance(cc.frozen, (int, numpy.integer)): moidx[:cc.frozen] = False elif len(cc.frozen) > 0: moidx[numpy.asarray(cc.frozen)] = False if mo_coeff is None: self.mo_coeff = mo_coeff = cc.mo_coeff[:,moidx] else: self.mo_coeff = mo_coeff = mo_coeff[:,moidx] dm = cc._scf.make_rdm1(cc.mo_coeff, cc.mo_occ) fockao = cc._scf.get_hcore() + cc._scf.get_veff(cc.mol, dm) self.fock = reduce(numpy.dot, (mo_coeff.T, fockao, mo_coeff)) nocc = cc.nocc nmo = cc.nmo nvir = nmo - nocc mem_incore, mem_outcore, mem_basic = _mem_usage(nocc, nvir) mem_now = lib.current_memory()[0] log = logger.Logger(cc.stdout, cc.verbose) if hasattr(cc._scf, 'with_df') and cc._scf.with_df: #log.warn('CCSD detected DF being bound to the HF object. ' # 'MO integrals are computed based on the DF 3-tensor integrals.\n' # 'You can switch to dfccsd.CCSD for the DF-CCSD implementation') nvir_pair = nvir * (nvir+1) // 2 oooo = numpy.zeros((nocc*nocc,nocc*nocc)) ooov = numpy.zeros((nocc*nocc,nocc*nvir)) ovoo = numpy.zeros((nocc*nvir,nocc*nocc)) oovv = numpy.zeros((nocc*nocc,nvir*nvir)) ovov = numpy.zeros((nocc*nvir,nocc*nvir)) ovvv = numpy.zeros((nocc*nvir,nvir_pair)) vvvv = numpy.zeros((nvir_pair,nvir_pair)) mo = numpy.asarray(mo_coeff, order='F') nmo = mo.shape[1] ijslice = (0, nmo, 0, nmo) Lpq = None for eri1 in cc._scf.with_df.loop(): Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', out=Lpq).reshape(-1,nmo,nmo) Loo = Lpq[:,:nocc,:nocc].reshape(-1,nocc**2) Lov = Lpq[:,:nocc,nocc:].reshape(-1,nocc*nvir) Lvv = Lpq[:,nocc:,nocc:].reshape(-1,nvir**2) lib.ddot(Loo.T, Loo, 1, oooo, 1) lib.ddot(Loo.T, Lov, 1, ooov, 1) lib.ddot(Lov.T, Loo, 1, ovoo, 1) lib.ddot(Loo.T, Lvv, 1, oovv, 1) lib.ddot(Lov.T, Lov, 1, ovov, 1) Lvv = lib.pack_tril(Lvv.reshape(-1,nvir,nvir)) lib.ddot(Lov.T, Lvv, 1, ovvv, 1) lib.ddot(Lvv.T, Lvv, 1, vvvv, 1) _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) self.feri1 = feri1 = h5py.File(_tmpfile1.name) def __del__feri1(self): feri1.close() self.feri1.__del__ = __del__feri1 self.feri1['oooo'] = oooo.reshape(nocc,nocc,nocc,nocc) self.feri1['ooov'] = ooov.reshape(nocc,nocc,nocc,nvir) self.feri1['ovoo'] = ovoo.reshape(nocc,nvir,nocc,nocc) self.feri1['oovv'] = oovv.reshape(nocc,nocc,nvir,nvir) self.feri1['ovov'] = ovov.reshape(nocc,nvir,nocc,nvir) self.feri1['ovvv'] = ovvv.reshape(nocc,nvir,nvir_pair) self.feri1['vvvv'] = vvvv.reshape(nvir_pair,nvir_pair) self.oooo = self.feri1['oooo'] self.ooov = self.feri1['ooov'] self.ovoo = self.feri1['ovoo'] self.oovv = self.feri1['oovv'] self.ovov = self.feri1['ovov'] self.ovvv = self.feri1['ovvv'] self.vvvv = self.feri1['vvvv'] elif (method == 'incore' and cc._scf._eri is not None and (mem_incore+mem_now < cc.max_memory) or cc.mol.incore_anyway): eri1 = ao2mo.incore.full(cc._scf._eri, mo_coeff) #:eri1 = ao2mo.restore(1, eri1, nmo) #:self.oooo = eri1[:nocc,:nocc,:nocc,:nocc].copy() #:self.ooov = eri1[:nocc,:nocc,:nocc,nocc:].copy() #:self.ovoo = eri1[:nocc,nocc:,:nocc,:nocc].copy() #:self.oovv = eri1[:nocc,:nocc,nocc:,nocc:].copy() #:self.ovov = eri1[:nocc,nocc:,:nocc,nocc:].copy() #:ovvv = eri1[:nocc,nocc:,nocc:,nocc:].copy() #:self.ovvv = numpy.empty((nocc,nvir,nvir*(nvir+1)//2)) #:for i in range(nocc): #: for j in range(nvir): #: self.ovvv[i,j] = lib.pack_tril(ovvv[i,j]) #:self.vvvv = ao2mo.restore(4, eri1[nocc:,nocc:,nocc:,nocc:], nvir) nvir_pair = nvir * (nvir+1) // 2 self.oooo = numpy.empty((nocc,nocc,nocc,nocc)) self.ooov = numpy.empty((nocc,nocc,nocc,nvir)) self.ovoo = numpy.empty((nocc,nvir,nocc,nocc)) self.oovv = numpy.empty((nocc,nocc,nvir,nvir)) self.ovov = numpy.empty((nocc,nvir,nocc,nvir)) self.ovvv = numpy.empty((nocc,nvir,nvir_pair)) self.vvvv = numpy.empty((nvir_pair,nvir_pair)) ij = 0 outbuf = numpy.empty((nmo,nmo,nmo)) for i in range(nocc): buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) for j in range(i+1): self.oooo[i,j] = self.oooo[j,i] = buf[j,:nocc,:nocc] self.ooov[i,j] = self.ooov[j,i] = buf[j,:nocc,nocc:] self.oovv[i,j] = self.oovv[j,i] = buf[j,nocc:,nocc:] ij += i + 1 ij1 = 0 for i in range(nocc,nmo): buf = lib.unpack_tril(eri1[ij:ij+i+1], out=outbuf[:i+1]) self.ovoo[:,i-nocc] = buf[:nocc,:nocc,:nocc] self.ovov[:,i-nocc] = buf[:nocc,:nocc,nocc:] for j in range(nocc): self.ovvv[j,i-nocc] = lib.pack_tril(_cp(buf[j,nocc:,nocc:])) for j in range(nocc, i+1): self.vvvv[ij1] = lib.pack_tril(_cp(buf[j,nocc:,nocc:])) ij1 += 1 ij += i + 1 else: cput1 = time.clock(), time.time() _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) _tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) self.feri1 = feri1 = h5py.File(_tmpfile1.name) def __del__feri1(self): feri1.close() self.feri1.__del__ = __del__feri1 orbo = mo_coeff[:,:nocc] orbv = mo_coeff[:,nocc:] nvpair = nvir * (nvir+1) // 2 self.oooo = self.feri1.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') self.ooov = self.feri1.create_dataset('ooov', (nocc,nocc,nocc,nvir), 'f8') self.ovoo = self.feri1.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8') self.oovv = self.feri1.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8') self.ovov = self.feri1.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8') self.ovvv = self.feri1.create_dataset('ovvv', (nocc,nvir,nvpair), 'f8') fsort = _ccsd.libcc.CCsd_sort_inplace nocc_pair = nocc*(nocc+1)//2 nvir_pair = nvir*(nvir+1)//2 def sort_inplace(eri): fsort(eri.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nocc), ctypes.c_int(nvir), ctypes.c_int(eri.shape[0])) vv = eri[:,:nvir_pair] oo = eri[:,nvir_pair:nvir_pair+nocc_pair] ov = eri[:,nvir_pair+nocc_pair:].reshape(-1,nocc,nvir) return oo, ov, vv buf = numpy.empty((nmo,nmo,nmo)) def save_occ_frac(i, p0, p1, eri): oo, ov, vv = sort_inplace(eri) self.oooo[i,p0:p1] = lib.unpack_tril(oo, out=buf) self.ooov[i,p0:p1] = ov self.oovv[i,p0:p1] = lib.unpack_tril(vv, out=buf) def save_vir_frac(i, p0, p1, eri): oo, ov, vv = sort_inplace(eri) self.ovoo[i,p0:p1] = lib.unpack_tril(oo, out=buf) self.ovov[i,p0:p1] = ov self.ovvv[i,p0:p1] = vv if not cc.direct: max_memory = max(2000,cc.max_memory-lib.current_memory()[0]) self.feri2 = feri2 = h5py.File(_tmpfile2.name) def __del__feri2(self): feri2.close() self.feri2.__del__ = __del__feri2 ao2mo.full(cc.mol, orbv, self.feri2, max_memory=max_memory, verbose=log) self.vvvv = self.feri2['eri_mo'] cput1 = log.timer_debug1('transforming vvvv', *cput1) tmpfile3 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) with h5py.File(tmpfile3.name, 'w') as feri: max_memory = max(2000, cc.max_memory-lib.current_memory()[0]) mo = numpy.hstack((orbv, orbo)) ao2mo.general(cc.mol, (orbo,mo,mo,mo), feri, max_memory=max_memory, verbose=log) cput1 = log.timer_debug1('transforming oppp', *cput1) blksize = max(1, int(min(8e9,max_memory*.5e6)/8/nmo**2)) handler = None for i in range(nocc): for p0, p1 in lib.prange(0, nvir, blksize): eri = _cp(feri['eri_mo'][i*nmo+p0:i*nmo+p1]) handler = async_do(handler, save_vir_frac, i, p0, p1, eri) for p0, p1 in lib.prange(0, nocc, blksize): eri = _cp(feri['eri_mo'][i*nmo+nvir+p0:i*nmo+nvir+p1]) handler = async_do(handler, save_occ_frac, i, p0, p1, eri) cput1 = log.timer_debug1('sorting %d'%i, *cput1) if handler is not None: handler.join() for key in feri.keys(): del(feri[key]) log.timer('CCSD integral transformation', *cput0)
def contract(myci, civec, eris): time0 = time.clock(), time.time() log = logger.Logger(myci.stdout, myci.verbose) nocc = myci.nocc nmo = myci.nmo nvir = nmo - nocc nov = nocc * nvir noo = nocc**2 c0 = civec[0] c1 = civec[1:nov + 1].reshape(nocc, nvir) c2 = civec[nov + 1:].reshape(nocc, nocc, nvir, nvir) cinew = numpy.zeros_like(civec) t1 = cinew[1:nov + 1].reshape(nocc, nvir) t2 = cinew[nov + 1:].reshape(nocc, nocc, nvir, nvir) t2new_tril = numpy.zeros((nocc * (nocc + 1) // 2, nvir, nvir)) myci.add_wvvVV_(c2, eris, t2new_tril) for i in range(nocc): for j in range(i + 1): t2[i, j] = t2new_tril[i * (i + 1) // 2 + j] t2[i, i] *= .5 t2new_tril = None time1 = log.timer_debug1('vvvv', *time0) #:t2 += numpy.einsum('iklj,klab->ijab', _cp(eris.oooo)*.5, c2) oooo = lib.transpose(_cp(eris.oooo).reshape(nocc, noo, nocc), axes=(0, 2, 1)) lib.ddot(oooo.reshape(noo, noo), c2.reshape(noo, -1), .5, t2.reshape(noo, -1), 1) foo = eris.fock[:nocc, :nocc].copy() fov = eris.fock[:nocc, nocc:].copy() fvv = eris.fock[nocc:, nocc:].copy() t1 += fov * c0 t1 += numpy.einsum('ib,ab->ia', c1, fvv) t1 -= numpy.einsum('ja,ji->ia', c1, foo) #:t2 += numpy.einsum('bc,ijac->ijab', fvv, c2) #:t2 -= numpy.einsum('kj,kiba->ijab', foo, c2) #:t2 += numpy.einsum('ia,jb->ijab', c1, fov) lib.ddot(c2.reshape(-1, nvir), fvv, 1, t2.reshape(-1, nvir), 1) lib.ddot(foo, c2.reshape(nocc, -1), -1, t2.reshape(nocc, -1), 1) for j in range(nocc): t2[:, j] += numpy.einsum('ia,b->iab', c1, fov[j]) eris_vovv = lib.unpack_tril(eris.vovv).reshape(nvir, nocc, nvir, -1) unit = _memory_usage_inloop(nocc, nvir) max_memory = max(2000, myci.max_memory - lib.current_memory()[0]) blksize = min(nvir, max(ccsd.BLKMIN, int(max_memory / unit))) log.debug1('max_memory %d MB, nocc,nvir = %d,%d blksize = %d', max_memory, nocc, nvir, blksize) nvir_pair = nvir * (nvir + 1) // 2 for p0, p1 in lib.prange(0, nvir, blksize): eris_vvoo = _cp(eris.vvoo[p0:p1]) oovv = lib.transpose(eris_vvoo.reshape(-1, nocc**2)) #:eris_oVoV = eris_vvoo.transpose(2,0,3,1) eris_oVoV = numpy.ndarray((nocc, p1 - p0, nocc, nvir)) eris_oVoV[:] = oovv.reshape(nocc, nocc, p1 - p0, nvir).transpose(0, 2, 1, 3) eris_vvoo = oovv = None #:tmp = numpy.einsum('ikca,jbkc->jiba', c2, eris_oVoV) #:t2[:,:,p0:p1] -= tmp*.5 #:t2[:,:,p0:p1] -= tmp.transpose(1,0,2,3) for i in range(nocc): tmp = lib.ddot(eris_oVoV.reshape(-1, nov), c2[i].reshape(nov, nvir)) tmp = tmp.reshape(nocc, p1 - p0, nvir) t2[:, i, p0:p1] -= tmp * .5 t2[i, :, p0:p1] -= tmp eris_voov = _cp(eris.voov[p0:p1]) for i in range(p0, p1): t2[:, :, i] += eris_voov[i - p0] * (c0 * .5) t1[:, p0:p1] += numpy.einsum('jb,aijb->ia', c1, eris_voov) * 2 t1[:, p0:p1] -= numpy.einsum('jb,iajb->ia', c1, eris_oVoV) #:ovov = eris_voov.transpose(2,0,1,3) - eris_vvoo.transpose(2,0,3,1) ovov = eris_oVoV ovov *= -.5 for i in range(nocc): ovov[i] += eris_voov[:, :, i] eris_oVoV = eris_vvoo = None #:theta = c2[:,:,p0:p1] #:theta = theta * 2 - theta.transpose(1,0,2,3) #:theta = theta.transpose(2,0,1,3) theta = numpy.ndarray((p1 - p0, nocc, nocc, nvir)) for i in range(p0, p1): theta[i - p0] = c2[:, :, i] * 2 theta[i - p0] -= c2[:, :, i].transpose(1, 0, 2) #:t2 += numpy.einsum('ckia,jckb->ijab', theta, ovov) for j in range(nocc): tmp = lib.ddot(theta.reshape(-1, nov).T, ovov[j].reshape(-1, nvir)) t2[:, j] += tmp.reshape(nocc, nvir, nvir) tmp = ovov = None t1[:, p0:p1] += numpy.einsum('aijb,jb->ia', theta, fov) eris_vooo = _cp(eris.vooo[p0:p1]) #:t1 -= numpy.einsum('bjka,bjki->ia', theta, eris_vooo) #:t2[:,:,p0:p1] -= numpy.einsum('ka,bjik->jiba', c1, eris_vooo) lib.ddot( eris_vooo.reshape(-1, nocc).T, theta.reshape(-1, nvir), -1, t1, 1) for i in range(p0, p1): t2[:, :, i] -= lib.ddot(eris_vooo[i - p0].reshape(noo, -1), c1).reshape(nocc, nocc, -1) eris_vooo = None eris_vovv = _cp(eris.vovv[p0:p1]).reshape(-1, nvir_pair) eris_vovv = lib.unpack_tril(eris_vovv).reshape(p1 - p0, nocc, nvir, nvir) #:t1 += numpy.einsum('cjib,cjba->ia', theta, eris_vovv) #:t2[:,:,p0:p1] += numpy.einsum('jc,aibc->ijab', c1, eris_vovv) theta = lib.transpose(theta.reshape(-1, nocc, nvir), axes=(0, 2, 1)) lib.ddot( theta.reshape(-1, nocc).T, eris_vovv.reshape(-1, nvir), 1, t1, 1) for i in range(p0, p1): tmp = lib.ddot(c1, eris_vovv[i - p0].reshape(-1, nvir).T) t2[:, :, i] += tmp.reshape(nocc, nocc, nvir).transpose(1, 0, 2) tmp = eris_vovv = None for i in range(nocc): for j in range(i + 1): t2[i, j] += t2[j, i].T t2[j, i] = t2[i, j].T cinew[0] += numpy.einsum('ia,ia->', fov, c1) * 2 cinew[0] += numpy.einsum('aijb,ijab->', eris.voov, c2) * 2 cinew[0] -= numpy.einsum('aijb,jiab->', eris.voov, c2) return cinew
def make_rdm2(ci, nmo, nocc): '''spin-traced 2pdm in chemist's notation ''' nvir = nmo - nocc noo = nocc**2 nov = nocc * nvir c0, c1, c2 = cisdvec_to_amplitudes(ci, nmo, nocc) doovv = c0 * c2 dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2) dovoo = -numpy.einsum('ia,klac->ickl', c1, c2) doooo = lib.ddot(c2.reshape(noo, -1), c2.reshape(noo, -1).T).reshape((nocc, ) * 4) dvvvv = lib.ddot(c2.reshape(noo, -1).T, c2.reshape(noo, -1)).reshape( (nvir, ) * 4) rdm2 = numpy.zeros((nmo, nmo, nmo, nmo)) rdm2[:nocc, :nocc, :nocc, :nocc] = doooo * 4 - doooo.transpose(1, 0, 2, 3) * 2 rdm2[:nocc, nocc:, :nocc, :nocc] = dovoo * 4 - dovoo.transpose(0, 1, 3, 2) * 2 rdm2[nocc:, :nocc, :nocc, :nocc] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose( 1, 0, 3, 2) rdm2[:nocc, :nocc, :nocc, nocc:] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(2, 3, 0, 1) rdm2[:nocc, :nocc, nocc:, :nocc] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(3, 2, 1, 0) rdm2[:nocc, :nocc, nocc:, nocc:] = doovv * 4 - doovv.transpose(1, 0, 2, 3) * 2 rdm2[nocc:, nocc:, :nocc, :nocc] = rdm2[:nocc, :nocc, nocc:, nocc:].transpose(2, 3, 0, 1) rdm2[nocc:, nocc:, nocc:, :nocc] = dvvvo * 4 - dvvvo.transpose(1, 0, 2, 3) * 2 rdm2[nocc:, nocc:, :nocc, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(1, 0, 3, 2) rdm2[nocc:, :nocc, nocc:, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(2, 3, 0, 1) rdm2[:nocc, nocc:, nocc:, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(3, 2, 1, 0) rdm2[nocc:, nocc:, nocc:, nocc:] = dvvvv * 4 - dvvvv.transpose(1, 0, 2, 3) * 2 theta = c2 * 2 - c2.transpose(1, 0, 2, 3) dovov = numpy.einsum('ia,kc->icka', c1, c1) * -2 #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2 #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2 dovov -= lib.ddot( c2.transpose(0, 2, 1, 3).reshape(nov, -1).T, theta.transpose(0, 2, 1, 3).reshape(nov, -1), 2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1) dovov -= lib.ddot( c2.transpose(0, 3, 1, 2).reshape(nov, -1), theta.transpose(0, 3, 1, 2).reshape(nov, -1).T, 2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1) dvoov = numpy.einsum('ia,kc->cika', c1, c1) * 4 #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2 dvoov += lib.ddot( theta.transpose(0, 2, 1, 3).reshape(nov, -1).T, theta.transpose(0, 2, 1, 3).reshape(nov, -1), 2).reshape(nocc, nvir, nocc, nvir).transpose(1, 2, 0, 3) rdm2[:nocc, nocc:, :nocc, nocc:] = dovov rdm2[nocc:, :nocc, nocc:, :nocc] = dovov.transpose(1, 0, 3, 2) rdm2[nocc:, :nocc, :nocc, nocc:] = dvoov rdm2[:nocc, nocc:, nocc:, :nocc] = dvoov.transpose(1, 0, 3, 2) rdm1 = make_rdm1(ci, nmo, nocc) for i in range(nocc): rdm1[i, i] -= 2 for i in range(nocc): for j in range(nocc): rdm2[i, j, i, j] += 4 rdm2[i, j, j, i] -= 2 rdm2[i, :, i, :] += rdm1 * 2 rdm2[:, i, :, i] += rdm1 * 2 rdm2[:, i, i, :] -= rdm1 rdm2[i, :, :, i] -= rdm1 return rdm2.transpose(0, 2, 1, 3) # to chemist's notation
def make_rdm2(ci, nmo, nocc): nvir = nmo - nocc noo = nocc**2 nov = nocc * nvir c0 = ci[0] c1 = ci[1:nocc * nvir + 1].reshape(nocc, nvir) c2 = ci[nocc * nvir + 1:].reshape(nocc, nocc, nvir, nvir) doovv = c0 * c2 dvvvo = numpy.einsum('ia,ikcd->cdak', c1, c2) dovoo = -numpy.einsum('ia,klac->ickl', c1, c2) doooo = lib.ddot(c2.reshape(noo, -1), c2.reshape(noo, -1).T).reshape((nocc, ) * 4) dvvvv = lib.ddot(c2.reshape(noo, -1).T, c2.reshape(noo, -1)).reshape( (nvir, ) * 4) rdm2 = numpy.zeros((nmo, nmo, nmo, nmo)) rdm2[:nocc, :nocc, :nocc, :nocc] = doooo * 4 - doooo.transpose(1, 0, 2, 3) * 2 rdm2[:nocc, nocc:, :nocc, :nocc] = dovoo * 4 - dovoo.transpose(0, 1, 3, 2) * 2 rdm2[nocc:, :nocc, :nocc, :nocc] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose( 1, 0, 3, 2) rdm2[:nocc, :nocc, :nocc, nocc:] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(2, 3, 0, 1) rdm2[:nocc, :nocc, nocc:, :nocc] = rdm2[:nocc, nocc:, :nocc, :nocc].transpose(3, 2, 1, 0) rdm2[:nocc, :nocc, nocc:, nocc:] = doovv * 4 - doovv.transpose(1, 0, 2, 3) * 2 rdm2[nocc:, nocc:, :nocc, :nocc] = rdm2[:nocc, :nocc, nocc:, nocc:].transpose(2, 3, 0, 1) rdm2[nocc:, nocc:, nocc:, :nocc] = dvvvo * 4 - dvvvo.transpose(1, 0, 2, 3) * 2 rdm2[nocc:, nocc:, :nocc, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(1, 0, 3, 2) rdm2[nocc:, :nocc, nocc:, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(2, 3, 0, 1) rdm2[:nocc, nocc:, nocc:, nocc:] = rdm2[nocc:, nocc:, nocc:, :nocc].transpose(3, 2, 1, 0) rdm2[nocc:, nocc:, nocc:, nocc:] = dvvvv * 4 - dvvvv.transpose(1, 0, 2, 3) * 2 # Fixme: This seems giving right answer, but not based on solid formula theta = c2 * 2 - c2.transpose(1, 0, 2, 3) dovov = numpy.einsum('ia,kc->icka', c1, c1) * -2 #:dovov -= numpy.einsum('kjcb,kica->jaib', c2, theta) * 2 #:dovov -= numpy.einsum('ikcb,jkca->iajb', c2, theta) * 2 dovov -= lib.ddot( c2.transpose(0, 2, 1, 3).reshape(nov, -1).T, theta.transpose(0, 2, 1, 3).reshape(nov, -1), 2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1) dovov -= lib.ddot( c2.transpose(0, 3, 1, 2).reshape(nov, -1), theta.transpose(0, 3, 1, 2).reshape(nov, -1).T, 2).reshape(nocc, nvir, nocc, nvir).transpose(0, 3, 2, 1) dvoov = numpy.einsum('ia,kc->cika', c1, c1) * 4 #:dvoov += numpy.einsum('kica,kjcb->ajib', theta, theta) * 2 dvoov += lib.ddot( theta.transpose(0, 2, 1, 3).reshape(nov, -1).T, theta.transpose(0, 2, 1, 3).reshape(nov, -1), 2).reshape(nocc, nvir, nocc, nvir).transpose(1, 2, 0, 3) # rdm2[:nocc,nocc:,:nocc,nocc:] = dovov*4-dvoov.transpose(1,0,2,3)*2 # rdm2[nocc:,:nocc,nocc:,:nocc] = rdm2[:nocc,nocc:,:nocc,nocc:].transpose(1,0,3,2) # rdm2[nocc:,:nocc,:nocc,nocc:] = dvoov*4-dovov.transpose(1,0,2,3)*2 # rdm2[:nocc,nocc:,nocc:,:nocc] = rdm2[nocc:,:nocc,:nocc,nocc:].transpose(1,0,3,2) rdm2[:nocc, nocc:, :nocc, nocc:] = dovov rdm2[nocc:, :nocc, nocc:, :nocc] = dovov.transpose(1, 0, 3, 2) rdm2[nocc:, :nocc, :nocc, nocc:] = dvoov rdm2[:nocc, nocc:, nocc:, :nocc] = dvoov.transpose(1, 0, 3, 2) rdm1 = make_rdm1(ci, nmo, nocc) for i in range(nocc): rdm1[i, i] -= 2 for i in range(nocc): for j in range(nocc): rdm2[i, j, i, j] += 4 rdm2[i, j, j, i] -= 2 rdm2[i, :, i, :] += rdm1 * 2 rdm2[:, i, :, i] += rdm1 * 2 rdm2[:, i, i, :] -= rdm1 rdm2[i, :, :, i] -= rdm1 return rdm2
def _ao2mo_ovov(mp, orbs, feri, max_memory=2000, verbose=None): time0 = (time.clock(), time.time()) log = logger.new_logger(mp, verbose) orboa = numpy.asarray(orbs[0], order='F') orbva = numpy.asarray(orbs[1], order='F') orbob = numpy.asarray(orbs[2], order='F') orbvb = numpy.asarray(orbs[3], order='F') nao, nocca = orboa.shape noccb = orbob.shape[1] nvira = orbva.shape[1] nvirb = orbvb.shape[1] mol = mp.mol int2e = mol._add_suffix('int2e') ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nbas = mol.nbas assert (nvira <= nao) assert (nvirb <= nao) ao_loc = mol.ao_loc_nr() dmax = max( 4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocca)**2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((nao, dmax, dmax, nao)) ftmp = lib.H5TmpFile() disk = (nocca**2 * (nao * (nao + dmax) / 2 + nvira**2) + noccb**2 * (nao * (nao + dmax) / 2 + nvirb**2) + nocca * noccb * (nao**2 + nvira * nvirb)) log.debug('max_memory %s MB (dmax = %s) required disk space %g MB', max_memory, dmax, disk * 8 / 1e6) fint = gto.moleintor.getints4c aa_blk_slices = [] ab_blk_slices = [] count_ab = 0 count_aa = 0 time1 = time0 with lib.call_in_background(ftmp.__setitem__) as save: for ish0, ish1, ni in sh_ranges: for jsh0, jsh1, nj in sh_ranges: i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] eri = fint(int2e, mol._atm, mol._bas, mol._env, shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0, nbas), aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) tmp_i = lib.ddot(orboa.T, eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao)) tmp_li = lib.ddot( orbob.T, tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T) tmp_li = tmp_li.reshape(noccb, nocca, (i1 - i0), (j1 - j0)) save('ab/%d' % count_ab, tmp_li.transpose(1, 0, 2, 3)) ab_blk_slices.append((i0, i1, j0, j1)) count_ab += 1 if ish0 >= jsh0: tmp_li = lib.ddot( orboa.T, tmp_i.reshape(nocca * (i1 - i0) * (j1 - j0), nao).T) tmp_li = tmp_li.reshape(nocca, nocca, (i1 - i0), (j1 - j0)) save('aa/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3)) tmp_i = lib.ddot( orbob.T, eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao)) tmp_li = lib.ddot( orbob.T, tmp_i.reshape(noccb * (i1 - i0) * (j1 - j0), nao).T) tmp_li = tmp_li.reshape(noccb, noccb, (i1 - i0), (j1 - j0)) save('bb/%d' % count_aa, tmp_li.transpose(1, 0, 2, 3)) aa_blk_slices.append((i0, i1, j0, j1)) count_aa += 1 time1 = log.timer_debug1( 'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1), *time1) time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0) eri = eribuf = tmp_i = tmp_li = None fovov = feri.create_dataset('ovov', (nocca * nvira, nocca * nvira), 'f8', chunks=(nvira, nvira)) fovOV = feri.create_dataset('ovOV', (nocca * nvira, noccb * nvirb), 'f8', chunks=(nvira, nvirb)) fOVOV = feri.create_dataset('OVOV', (noccb * nvirb, noccb * nvirb), 'f8', chunks=(nvirb, nvirb)) occblk = int( min(max(nocca, noccb), max(4, 250 / nocca, max_memory * .9e6 / 8 / (nao**2 * nocca) / 5))) def load_aa(h5g, nocc, i0, eri): if i0 < nocc: i1 = min(i0 + occblk, nocc) for k, (p0, p1, q0, q1) in enumerate(aa_blk_slices): eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1] if p0 != q0: dat = numpy.asarray(h5g[str(k)][:, i0:i1]) eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2) def load_ab(h5g, nocca, i0, eri): if i0 < nocca: i1 = min(i0 + occblk, nocca) for k, (p0, p1, q0, q1) in enumerate(ab_blk_slices): eri[:i1 - i0, :, p0:p1, q0:q1] = h5g[str(k)][i0:i1] def save(h5dat, nvir, i0, i1, dat): for i in range(i0, i1): h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape(nvir, -1) with lib.call_in_background(save) as bsave: with lib.call_in_background(load_aa) as prefetch: buf_prefecth = numpy.empty((occblk, nocca, nao, nao)) buf = numpy.empty_like(buf_prefecth) load_aa(ftmp['aa'], nocca, 0, buf_prefecth) for i0, i1 in lib.prange(0, nocca, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['aa'], nocca, i1, buf_prefecth) eri = buf[:i1 - i0].reshape((i1 - i0) * nocca, nao, nao) dat = _ao2mo.nr_e2(eri, orbva, (0, nvira, 0, nvira), 's1', 's1') bsave( fovov, nvira, i0, i1, dat.reshape(i1 - i0, nocca, nvira, nvira).transpose(0, 2, 1, 3)) time1 = log.timer_debug1( 'pass2 ao2mo for aa [%d:%d]' % (i0, i1), *time1) buf_prefecth = numpy.empty((occblk, noccb, nao, nao)) buf = numpy.empty_like(buf_prefecth) load_aa(ftmp['bb'], noccb, 0, buf_prefecth) for i0, i1 in lib.prange(0, noccb, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['bb'], noccb, i1, buf_prefecth) eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao) dat = _ao2mo.nr_e2(eri, orbvb, (0, nvirb, 0, nvirb), 's1', 's1') bsave( fOVOV, nvirb, i0, i1, dat.reshape(i1 - i0, noccb, nvirb, nvirb).transpose(0, 2, 1, 3)) time1 = log.timer_debug1( 'pass2 ao2mo for bb [%d:%d]' % (i0, i1), *time1) orbvab = numpy.asarray(numpy.hstack((orbva, orbvb)), order='F') with lib.call_in_background(load_ab) as prefetch: load_ab(ftmp['ab'], nocca, 0, buf_prefecth) for i0, i1 in lib.prange(0, nocca, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(ftmp['ab'], nocca, i1, buf_prefecth) eri = buf[:i1 - i0].reshape((i1 - i0) * noccb, nao, nao) dat = _ao2mo.nr_e2(eri, orbvab, (0, nvira, nvira, nvira + nvirb), 's1', 's1') bsave( fovOV, nvira, i0, i1, dat.reshape(i1 - i0, noccb, nvira, nvirb).transpose(0, 2, 1, 3)) time1 = log.timer_debug1( 'pass2 ao2mo for ab [%d:%d]' % (i0, i1), *time1) time0 = log.timer('mp2 ao2mo_ovov pass2', *time0)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) gs = mydf.gs Gv, Gvbase, kws = cell.get_Gv_weights(gs) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngs = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e_sph', hermi=1, kpts=uniq_kpts) j2ctags = [] nauxs = [] t1 = log.timer_debug1('2c2e', *t1) if h5py.is_hdf5(cderi_file): feri = h5py.File(cderi_file) else: feri = h5py.File(cderi_file, 'w') for k, kpt in enumerate(uniq_kpts): aoaux = ft_ao.ft_ao(fused_cell, Gv, None, b, gxyz, Gvbase, kpt).T coulG = numpy.sqrt(mydf.weighted_coulG(kpt, False, gs)) kLR = (aoaux.real * coulG).T kLI = (aoaux.imag * coulG).T if not kLR.flags.c_contiguous: kLR = lib.transpose(kLR.T) if not kLI.flags.c_contiguous: kLI = lib.transpose(kLI.T) aoaux = None kLR1 = numpy.asarray(kLR[:, naux:], order='C') kLI1 = numpy.asarray(kLI[:, naux:], order='C') if is_zero(kpt): # kpti == kptj for p0, p1 in mydf.mpi_prange(0, ngs): j2cR = lib.ddot(kLR1[p0:p1].T, kLR[p0:p1]) j2cR = lib.ddot(kLI1[p0:p1].T, kLI[p0:p1], 1, j2cR, 1) j2c[k][naux:] -= mpi.allreduce(j2cR) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: for p0, p1 in mydf.mpi_prange(0, ngs): j2cR, j2cI = zdotCN(kLR1[p0:p1].T, kLI1[p0:p1].T, kLR[p0:p1], kLI[p0:p1]) j2cR = mpi.allreduce(j2cR) j2cI = mpi.allreduce(j2cI) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() j2c[k] = fuse(fuse(j2c[k]).T).T try: feri['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') nauxs.append(naux) except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that gs is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([e.message, msg])) w, v = scipy.linalg.eigh(j2c) log.debug2('metric linear dependency for kpt %s', uniq_kptji_id) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < LINEAR_DEP_THR)) v = v[:, w > LINEAR_DEP_THR].T.conj() v /= numpy.sqrt(w[w > LINEAR_DEP_THR]).reshape(-1, 1) feri['j2c/%d' % k] = v j2ctags.append('eig') nauxs.append(v.shape[0]) kLR = kLI = kLR1 = kLI1 = coulG = None j2c = None aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' vbar = mydf.auxbar(fused_cell) vbar = fuse(vbar) ovlp = cell.pbc_intor('int1e_ovlp_sph', hermi=1, kpts=kptjs[aosym_s2]) ovlp = [lib.pack_tril(s) for s in ovlp] t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse)) buflen = max( int( min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao, nao / 3 / mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e_sph', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e_sph') def gen_int3c(auxcell, job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id if dataname in feri: del (feri[dataname]) i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 dij = (i1 - i0) * nao if j_only: buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii))) else: buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij))) auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * dij * buflen, dtype=dtype) buf1 = numpy.empty(dij * buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): feri.create_dataset(key, shape, 'f8') else: feri.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e2 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) if j_only: mat = numpy.ndarray((nkptij, dii, nrow), dtype=dtype, buffer=buf) else: mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = feri['%s/%d' % (dataname, k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2] - i0 * nao out = numpy.ndarray((nrow, dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0 + nrow] = v.real else: h5dat[naux0:naux0 + nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) Gaux *= mydf.weighted_coulG(kpt, False, gs).reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') j2c = numpy.asarray(feri['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if is_zero(kpt): aosym = 's2' else: aosym = 's1' j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(feri[key]) if is_zero(kpt): for i, c in enumerate(vbar): if c != 0: v[i] -= c * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngs, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2(' blksize (%d,%d)', Gblksize, ncol) shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngs, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) else: v = lib.dot(j2c, v) feri['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v t2 = t1 j3c_workers = numpy.zeros(len(j3c_jobs), dtype=int) #for job_id, ish0, ish1 in mpi.work_share_partition(j3c_jobs): for job_id, ish0, ish1 in mpi.work_stealing_partition(j3c_jobs): gen_int3c(fused_cell, job_id, ish0, ish1) t2 = log.alltimer_debug2('int j3c %d' % job_id, *t2) for k, kpt in enumerate(uniq_kpts): ft_fuse(job_id, k, ish0, ish1) t2 = log.alltimer_debug2('ft-fuse %d k %d' % (job_id, k), *t2) j3c_workers[job_id] = rank j3c_workers = mpi.allreduce(j3c_workers) log.debug2('j3c_workers %s', j3c_workers) j2c = kLRs = kLIs = ovlp = vbar = fuse = gen_int3c = ft_fuse = None t1 = log.timer_debug1('int3c and fuse', *t1) def get_segs_loc(aosym): off0 = numpy.asarray([ao_loc[i0] for x, i0, i1 in j3c_jobs]) off1 = numpy.asarray([ao_loc[i1] for x, i0, i1 in j3c_jobs]) if aosym: # s2 dims = off1 * (off1 + 1) // 2 - off0 * (off0 + 1) // 2 else: dims = (off1 - off0) * nao #dims = numpy.asarray([ao_loc[i1]-ao_loc[i0] for x,i0,i1 in j3c_jobs]) dims = numpy.hstack( [dims[j3c_workers == w] for w in range(mpi.pool.size)]) job_idx = numpy.hstack( [numpy.where(j3c_workers == w)[0] for w in range(mpi.pool.size)]) segs_loc = numpy.append(0, numpy.cumsum(dims)) segs_loc = [(segs_loc[j], segs_loc[j + 1]) for j in numpy.argsort(job_idx)] return segs_loc segs_loc_s1 = get_segs_loc(False) segs_loc_s2 = get_segs_loc(True) if 'j3c' in feri: del (feri['j3c']) segsize = (max(nauxs) + mpi.pool.size - 1) // mpi.pool.size naux0 = rank * segsize for k, kptij in enumerate(kptij_lst): naux1 = min(nauxs[uniq_inverse[k]], naux0 + segsize) nrow = max(0, naux1 - naux0) if gamma_point(kptij): dtype = 'f8' else: dtype = 'c16' if aosym_s2[k]: nao_pair = nao * (nao + 1) // 2 else: nao_pair = nao * nao feri.create_dataset('j3c/%d' % k, (nrow, nao_pair), dtype, maxshape=(None, nao_pair)) def load(k, p0, p1): naux1 = nauxs[uniq_inverse[k]] slices = [(min(i * segsize + p0, naux1), min(i * segsize + p1, naux1)) for i in range(mpi.pool.size)] segs = [] for p0, p1 in slices: val = [] for job_id, worker in enumerate(j3c_workers): if rank == worker: key = 'j3c-chunks/%d/%d' % (job_id, k) val.append(feri[key][p0:p1].ravel()) if val: segs.append(numpy.hstack(val)) else: segs.append(numpy.zeros(0)) return segs def save(k, p0, p1, segs): segs = mpi.alltoall(segs) naux1 = nauxs[uniq_inverse[k]] loc0, loc1 = min(p0, naux1 - naux0), min(p1, naux1 - naux0) nL = loc1 - loc0 if nL > 0: if aosym_s2[k]: segs = numpy.hstack([ segs[i0 * nL:i1 * nL].reshape(nL, -1) for i0, i1 in segs_loc_s2 ]) else: segs = numpy.hstack([ segs[i0 * nL:i1 * nL].reshape(nL, -1) for i0, i1 in segs_loc_s1 ]) feri['j3c/%d' % k][loc0:loc1] = segs mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, min(8000, mydf.max_memory - mem_now)) if numpy.all(aosym_s2): if gamma_point(kptij_lst): blksize = max(16, int(max_memory * .5e6 / 8 / nao**2)) else: blksize = max(16, int(max_memory * .5e6 / 16 / nao**2)) else: blksize = max(16, int(max_memory * .5e6 / 16 / nao**2 / 2)) log.debug1('max_momory %d MB (%d in use), blksize %d', max_memory, mem_now, blksize) t2 = t1 with lib.call_in_background(save) as async_write: for k, kptji in enumerate(kptij_lst): for p0, p1 in lib.prange(0, segsize, blksize): segs = load(k, p0, p1) async_write(k, p0, p1, segs) t2 = log.timer_debug1( 'assemble k=%d %d:%d (in %d)' % (k, p0, p1, segsize), *t2) if 'j3c-chunks' in feri: del (feri['j3c-chunks']) if 'j3c-kptij' in feri: del (feri['j3c-kptij']) feri['j3c-kptij'] = kptij_lst t1 = log.alltimer_debug1('assembling j3c', *t1) feri.close()
def _contract_vvvv_t2(mycc, mol, vvL, t2, out=None, verbose=None): '''Ht2 = numpy.einsum('ijcd,acdb->ijab', t2, vvvv) Args: vvvv : None or integral object if vvvv is None, contract t2 to AO-integrals using AO-direct algorithm ''' _dgemm = lib.numpy_helper._dgemm time0 = time.clock(), time.time() log = logger.new_logger(mol, verbose) naux = vvL.shape[-1] nvira, nvirb = t2.shape[-2:] x2 = t2.reshape(-1,nvira,nvirb) nocc2 = x2.shape[0] nvir2 = nvira * nvirb Ht2 = numpy.ndarray(x2.shape, buffer=out) Ht2[:] = 0 max_memory = max(MEMORYMIN, mycc.max_memory - lib.current_memory()[0]) def contract_blk_(eri, i0, i1, j0, j1): ic = i1 - i0 jc = j1 - j0 #:Ht2[:,j0:j1] += numpy.einsum('xef,efab->xab', x2[:,i0:i1], eri) _dgemm('N', 'N', nocc2, jc*nvirb, ic*nvirb, x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb), Ht2.reshape(-1,nvir2), 1, 1, i0*nvirb, 0, j0*nvirb) if i0 > j0: #:Ht2[:,i0:i1] += numpy.einsum('xef,abef->xab', x2[:,j0:j1], eri) _dgemm('N', 'T', nocc2, ic*nvirb, jc*nvirb, x2.reshape(-1,nvir2), eri.reshape(-1,jc*nvirb), Ht2.reshape(-1,nvir2), 1, 1, j0*nvirb, 0, i0*nvirb) #TODO: check if vvL can be entirely loaded into memory nvir_pair = nvirb * (nvirb+1) // 2 dmax = numpy.sqrt(max_memory*.7e6/8/nvirb**2/2) dmax = int(min((nvira+3)//4, max(ccsd.BLKMIN, dmax))) vvblk = (max_memory*1e6/8 - dmax**2*(nvirb**2*1.5+naux))/naux vvblk = int(min((nvira+3)//4, max(ccsd.BLKMIN, vvblk/naux))) eribuf = numpy.empty((dmax,dmax,nvir_pair)) loadbuf = numpy.empty((dmax,dmax,nvirb,nvirb)) tril2sq = lib.square_mat_in_trilu_indices(nvira) for i0, i1 in lib.prange(0, nvira, dmax): off0 = i0*(i0+1)//2 off1 = i1*(i1+1)//2 vvL0 = _cp(vvL[off0:off1]) for j0, j1 in lib.prange(0, i1, dmax): ijL = vvL0[tril2sq[i0:i1,j0:j1] - off0].reshape(-1,naux) eri = numpy.ndarray(((i1-i0)*(j1-j0),nvir_pair), buffer=eribuf) for p0, p1 in lib.prange(0, nvir_pair, vvblk): vvL1 = _cp(vvL[p0:p1]) eri[:,p0:p1] = lib.ddot(ijL, vvL1.T) vvL1 = None tmp = numpy.ndarray((i1-i0,nvirb,j1-j0,nvirb), buffer=loadbuf) _ccsd.libcc.CCload_eri(tmp.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*4)(i0, i1, j0, j1), ctypes.c_int(nvirb)) contract_blk_(tmp, i0, i1, j0, j1) time0 = log.timer_debug1('vvvv [%d:%d,%d:%d]'%(i0,i1,j0,j1), *time0) return Ht2.reshape(t2.shape)
def _ao2mo_ovov(mp, orbo, orbv, feri, max_memory=2000, verbose=None): time0 = (time.clock(), time.time()) log = logger.new_logger(mp, verbose) mol = mp.mol int2e = mol._add_suffix('int2e') ao2mopt = _ao2mo.AO2MOpt(mol, int2e, 'CVHFnr_schwarz_cond', 'CVHFsetnr_direct_scf') nao, nocc = orbo.shape nvir = orbv.shape[1] nbas = mol.nbas assert (nvir <= nao) ao_loc = mol.ao_loc_nr() dmax = max( 4, min(nao / 3, numpy.sqrt(max_memory * .95e6 / 8 / (nao + nocc)**2))) sh_ranges = ao2mo.outcore.balance_partition(ao_loc, dmax) dmax = max(x[2] for x in sh_ranges) eribuf = numpy.empty((nao, dmax, dmax, nao)) ftmp = lib.H5TmpFile() log.debug('max_memory %s MB (dmax = %s) required disk space %g MB', max_memory, dmax, nocc**2 * (nao * (nao + dmax) / 2 + nvir**2) * 8 / 1e6) buf_i = numpy.empty((nocc * dmax**2 * nao)) buf_li = numpy.empty((nocc**2 * dmax**2)) buf1 = numpy.empty_like(buf_li) fint = gto.moleintor.getints4c jk_blk_slices = [] count = 0 time1 = time0 with lib.call_in_background(ftmp.__setitem__) as save: for ip, (ish0, ish1, ni) in enumerate(sh_ranges): for jsh0, jsh1, nj in sh_ranges[:ip + 1]: i0, i1 = ao_loc[ish0], ao_loc[ish1] j0, j1 = ao_loc[jsh0], ao_loc[jsh1] jk_blk_slices.append((i0, i1, j0, j1)) eri = fint(int2e, mol._atm, mol._bas, mol._env, shls_slice=(0, nbas, ish0, ish1, jsh0, jsh1, 0, nbas), aosym='s1', ao_loc=ao_loc, cintopt=ao2mopt._cintopt, out=eribuf) tmp_i = numpy.ndarray((nocc, (i1 - i0) * (j1 - j0) * nao), buffer=buf_i) tmp_li = numpy.ndarray((nocc, nocc * (i1 - i0) * (j1 - j0)), buffer=buf_li) lib.ddot(orbo.T, eri.reshape(nao, (i1 - i0) * (j1 - j0) * nao), c=tmp_i) lib.ddot(orbo.T, tmp_i.reshape(nocc * (i1 - i0) * (j1 - j0), nao).T, c=tmp_li) tmp_li = tmp_li.reshape(nocc, nocc, (i1 - i0), (j1 - j0)) save(str(count), tmp_li.transpose(1, 0, 2, 3)) buf_li, buf1 = buf1, buf_li count += 1 time1 = log.timer_debug1( 'partial ao2mo [%d:%d,%d:%d]' % (ish0, ish1, jsh0, jsh1), *time1) time1 = time0 = log.timer('mp2 ao2mo_ovov pass1', *time0) eri = eribuf = tmp_i = tmp_li = buf_i = buf_li = buf1 = None h5dat = feri.create_dataset('ovov', (nocc * nvir, nocc * nvir), 'f8', chunks=(nvir, nvir)) occblk = int( min(nocc, max(4, 250 / nocc, max_memory * .9e6 / 8 / (nao**2 * nocc) / 5))) def load(i0, eri): if i0 < nocc: i1 = min(i0 + occblk, nocc) for k, (p0, p1, q0, q1) in enumerate(jk_blk_slices): eri[:i1 - i0, :, p0:p1, q0:q1] = ftmp[str(k)][i0:i1] if p0 != q0: dat = numpy.asarray(ftmp[str(k)][:, i0:i1]) eri[:i1 - i0, :, q0:q1, p0:p1] = dat.transpose(1, 0, 3, 2) def save(i0, i1, dat): for i in range(i0, i1): h5dat[i * nvir:(i + 1) * nvir] = dat[i - i0].reshape( nvir, nocc * nvir) orbv = numpy.asarray(orbv, order='F') buf_prefecth = numpy.empty((occblk, nocc, nao, nao)) buf = numpy.empty_like(buf_prefecth) bufw = numpy.empty((occblk * nocc, nvir**2)) bufw1 = numpy.empty_like(bufw) with lib.call_in_background(load) as prefetch: with lib.call_in_background(save) as bsave: load(0, buf_prefecth) for i0, i1 in lib.prange(0, nocc, occblk): buf, buf_prefecth = buf_prefecth, buf prefetch(i1, buf_prefecth) eri = buf[:i1 - i0].reshape((i1 - i0) * nocc, nao, nao) dat = _ao2mo.nr_e2(eri, orbv, (0, nvir, 0, nvir), 's1', 's1', out=bufw) bsave( i0, i1, dat.reshape(i1 - i0, nocc, nvir, nvir).transpose(0, 2, 1, 3)) bufw, bufw1 = bufw1, bufw time1 = log.timer_debug1('pass2 ao2mo [%d:%d]' % (i0, i1), *time1) time0 = log.timer('mp2 ao2mo_ovov pass2', *time0) return h5dat
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:,0] kptjs = kptij_lst[:,1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:]*coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:]*coulG[p0:p1], LkI.T) j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T else: j2cR, j2cI = zdotCN(LkR[naux:]*coulG[p0:p1], LkI[naux:]*coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux,naux:] = j2c[k][naux:,:naux].T.conj() LkR = LkI = None fswap['j2c/%d'%k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d'%uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1]/w[0], numpy.count_nonzero(w<mydf.linear_dep_threshold)) v1 = v[:,w>mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w>mydf.linear_dep_threshold]).reshape(-1,1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:,idx]/numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1,1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory-mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.38e6/16/naux/(nkptj+1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory*.1e6/16/buflen/(nkptj+1))) else: Gblksize = max(16, int(max_memory*.2e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(buflen*Gblksize) pqkIbuf = numpy.empty(buflen*Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj*buflen*Gblksize, dtype=numpy.complex128) def pw_contract(istep, sh_range, j3cR, j3cI): bstart, bend, ncol = sh_range if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG,ncol) pqkR = numpy.ndarray((ncol,nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol,nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d'%(ji,istep)] = v else: feri['j3c/%d/%d'%(ji,istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d'%(ji,istep)] = lib.dot(j2c_negative, v) with lib.call_in_background(pw_contract) as compute: col1 = 0 for istep, sh_range in enumerate(shranges): log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', \ istep+1, len(shranges), *sh_range) bstart, bend, ncol = sh_range col0, col1 = col1, col1+ncol j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([fswap['j3c-junk/%d/%d'%(idx,i)][0,col0:col1].T for i in range(nsegs)]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None compute(istep, sh_range, j3cR, j3cI) for ji in adapted_ji_idx: del(fswap['j3c-junk/%d'%ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2*numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def _make_df_eris(cc, mo_coeff=None): cput0 = (time.clock(), time.time()) eris = _ChemistsERIs() eris._common_init_(cc, mo_coeff) nocc = eris.nocc nmo = eris.fock.shape[0] nvir = nmo - nocc nocc_pair = nocc*(nocc+1)//2 nvir_pair = nvir*(nvir+1)//2 with_df = cc.with_df naux = eris.naux = with_df.get_naoaux() eris.feri = lib.H5TmpFile() eris.oooo = eris.feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), 'f8') eris.ovoo = eris.feri.create_dataset('ovoo', (nocc,nvir,nocc,nocc), 'f8', chunks=(nocc,1,nocc,nocc)) eris.ovov = eris.feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), 'f8', chunks=(nocc,1,nocc,nvir)) eris.ovvo = eris.feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), 'f8', chunks=(nocc,1,nvir,nocc)) eris.oovv = eris.feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), 'f8', chunks=(nocc,nocc,1,nvir)) # nrow ~ 4e9/8/blockdim to ensure hdf5 chunk < 4GB chunks = (min(nvir_pair,int(4e8/with_df.blockdim)), min(naux,with_df.blockdim)) eris.vvL = eris.feri.create_dataset('vvL', (nvir_pair,naux), 'f8', chunks=chunks) Loo = numpy.empty((naux,nocc,nocc)) Lov = numpy.empty((naux,nocc,nvir)) fswap = lib.H5TmpFile() mo = numpy.asarray(eris.mo_coeff, order='F') ijslice = (0, nmo, 0, nmo) p1 = 0 Lpq = None for k, eri1 in enumerate(with_df.loop()): Lpq = _ao2mo.nr_e2(eri1, mo, ijslice, aosym='s2', mosym='s1', out=Lpq) p0, p1 = p1, p1 + Lpq.shape[0] Lpq = Lpq.reshape(p1-p0,nmo,nmo) Loo[p0:p1] = Lpq[:,:nocc,:nocc] Lov[p0:p1] = Lpq[:,:nocc,nocc:] Lvv = lib.pack_tril(Lpq[:,nocc:,nocc:]) eris.vvL[:,p0:p1] = Lvv.T Lpq = Lvv = None Loo = Loo.reshape(naux,nocc**2) Lvo = Lov.transpose(0,2,1).reshape(naux,nvir*nocc) Lov = Lov.reshape(naux,nocc*nvir) eris.oooo[:] = lib.ddot(Loo.T, Loo).reshape(nocc,nocc,nocc,nocc) eris.ovoo[:] = lib.ddot(Lov.T, Loo).reshape(nocc,nvir,nocc,nocc) ovov = lib.ddot(Lov.T, Lov).reshape(nocc,nvir,nocc,nvir) eris.ovov[:] = ovov eris.ovvo[:] = ovov.transpose(0,1,3,2) ovov = None mem_now = lib.current_memory()[0] max_memory = max(0, cc.max_memory - mem_now) blksize = max(ccsd.BLKMIN, int((max_memory*.9e6/8-nocc**2*nvir_pair)/(nocc**2+naux))) oovv_tril = numpy.empty((nocc*nocc,nvir_pair)) for p0, p1 in lib.prange(0, nvir_pair, blksize): oovv_tril[:,p0:p1] = lib.ddot(Loo.T, _cp(eris.vvL[p0:p1]).T) eris.oovv[:] = lib.unpack_tril(oovv_tril).reshape(nocc,nocc,nvir,nvir) oovv_tril = Loo = None Lov = Lov.reshape(naux,nocc,nvir) vblk = max(nocc, int((max_memory*.15e6/8)/(nocc*nvir_pair))) vvblk = int(min(nvir_pair, 4e8/nocc, max(4, (max_memory*.8e6/8)/(vblk*nocc+naux)))) eris.ovvv = eris.feri.create_dataset('ovvv', (nocc,nvir,nvir_pair), 'f8', chunks=(nocc,1,vvblk)) for q0, q1 in lib.prange(0, nvir_pair, vvblk): vvL = _cp(eris.vvL[q0:q1]) for p0, p1 in lib.prange(0, nvir, vblk): tmpLov = _cp(Lov[:,:,p0:p1]).reshape(naux,-1) eris.ovvv[:,p0:p1,q0:q1] = lib.ddot(tmpLov.T, vvL.T).reshape(nocc,p1-p0,q1-q0) vvL = None return eris
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): t1 = (time.clock(), time.time()) log = logger.Logger(mydf.stdout, mydf.verbose) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) fused_cell, fuse = fuse_auxcell(mydf, auxcell) # The ideal way to hold the temporary integrals is to store them in the # cderi_file and overwrite them inplace in the second pass. The current # HDF5 library does not have an efficient way to manage free space in # overwriting. It often leads to the cderi_file ~2 times larger than the # necessary size. For now, dumping the DF integral intermediates to a # separated temporary file can avoid this issue. The DF intermediates may # be terribly huge. The temporary file should be placed in the same disk # as cderi_file. swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None outcore._aux_e2(cell, fused_cell, fswap, 'int3c2e', aosym='s2', kptij_lst=kptij_lst, dataname='j3c-junk', max_memory=max_memory) t1 = log.timer_debug1('3c2e', *t1) nao = cell.nao_nr() naux = auxcell.nao_nr() mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) max_memory = max(2000, mydf.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) for p0, p1 in lib.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c[k][naux:] -= lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c[k][naux:] -= lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c[k][naux:] -= j2cR + j2cI * 1j j2c[k][:naux, naux:] = j2c[k][naux:, :naux].T.conj() LkR = LkI = None fswap['j2c/%d' % k] = fuse(fuse(j2c[k]).T).T j2c = coulG = None def cholesky_decomposed_metric(uniq_kptji_id): j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2c_negative = None try: j2c = scipy.linalg.cholesky(j2c, lower=True) j2ctag = 'CD' except scipy.linalg.LinAlgError: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c) log.debug('DF metric linear dependency for kpt %s', uniq_kptji_id) log.debug('cond = %.4g, drop %d bfns', w[-1] / w[0], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].conj().T v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) j2c = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: j2c_negative = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = None j2ctag = 'eig' return j2c, j2c_negative, j2ctag feri = h5py.File(cderi_file, 'w') feri['j3c-kptij'] = kptij_lst nsegs = len(fswap['j3c-junk/0']) def make_kpt(uniq_kptji_id, cholesky_j2c): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti log.debug1('kpt = %s', kpt) adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) log.debug1('adapted_ji_idx = %s', adapted_ji_idx) j2c, j2c_negative, j2ctag = cholesky_j2c shls_slice = (auxcell.nbas, fused_cell.nbas) Gaux = ft_ao.ft_ao(fused_cell, Gv, shls_slice, b, gxyz, Gvbase, kpt) wcoulG = mydf.weighted_coulG(kpt, False, mesh) Gaux *= wcoulG.reshape(-1, 1) kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao * (nao + 1) // 2 if cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] else: aosym = 's1' nao_pair = nao**2 mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, mydf.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory * .38e6 / 16 / naux / (nkptj + 1)), 1), nao_pair) shranges = _guess_shell_ranges(cell, buflen, aosym) buflen = max([x[2] for x in shranges]) # +1 for a pqkbuf if aosym == 's2': Gblksize = max(16, int(max_memory * .1e6 / 16 / buflen / (nkptj + 1))) else: Gblksize = max(16, int(max_memory * .2e6 / 16 / buflen / (nkptj + 1))) Gblksize = min(Gblksize, ngrids, 16384) def load(aux_slice): col0, col1 = aux_slice j3cR = [] j3cI = [] for k, idx in enumerate(adapted_ji_idx): v = numpy.vstack([ fswap['j3c-junk/%d/%d' % (idx, i)][0, col0:col1].T for i in range(nsegs) ]) # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if is_zero(kpt) and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][col0:col1] j3cR.append(numpy.asarray(v.real, order='C')) if is_zero(kpt) and gamma_point(adapted_kptjs[k]): j3cI.append(None) else: j3cI.append(numpy.asarray(v.imag, order='C')) v = None return j3cR, j3cI pqkRbuf = numpy.empty(buflen * Gblksize) pqkIbuf = numpy.empty(buflen * Gblksize) # buf for ft_aopair buf = numpy.empty(nkptj * buflen * Gblksize, dtype=numpy.complex128) cols = [sh_range[2] for sh_range in shranges] locs = numpy.append(0, numpy.cumsum(cols)) tasks = zip(locs[:-1], locs[1:]) for istep, (j3cR, j3cI) in enumerate(lib.map_with_prefetch(load, tasks)): bstart, bend, ncol = shranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep + 1, len(shranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR[p0:p1].T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR[p0:p1].T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI[p0:p1].T, pqkR.T, 1, j3cI[k][naux:], 1) for k, ji in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) feri['j3c/%d/%d' % (ji, istep)] = v else: feri['j3c/%d/%d' % (ji, istep)] = lib.dot(j2c, v) # low-dimension systems if j2c_negative is not None: feri['j3c-/%d/%d' % (ji, istep)] = lib.dot(j2c_negative, v) j3cR = j3cI = None for ji in adapted_ji_idx: del (fswap['j3c-junk/%d' % ji]) # Wrapped around boundary and symmetry between k and -k can be used # explicitly for the metric integrals. We consider this symmetry # because it is used in the df_ao2mo module when contracting two 3-index # integral tensors to the 4-index 2e integral tensor. If the symmetry # related k-points are treated separately, the resultant 3-index tensors # may have inconsistent dimension due to the numerial noise when handling # linear dependency of j2c. def conj_j2c(cholesky_j2c): j2c, j2c_negative, j2ctag = cholesky_j2c if j2c_negative is None: return j2c.conj(), None, j2ctag else: return j2c.conj(), j2c_negative.conj(), j2ctag a = cell.lattice_vectors() / (2 * numpy.pi) def kconserve_indices(kpt): '''search which (kpts+kpt) satisfies momentum conservation''' kdif = numpy.einsum('wx,ix->wi', a, uniq_kpts + kpt) kdif_int = numpy.rint(kdif) mask = numpy.einsum('wi->i', abs(kdif - kdif_int)) < KPT_DIFF_TOL uniq_kptji_ids = numpy.where(mask)[0] return uniq_kptji_ids done = numpy.zeros(len(uniq_kpts), dtype=bool) for k, kpt in enumerate(uniq_kpts): if done[k]: continue log.debug1('Cholesky decomposition for j2c at kpt %s', k) cholesky_j2c = cholesky_decomposed_metric(k) # The k-point k' which has (k - k') * a = 2n pi. Metric integrals have the # symmetry S = S uniq_kptji_ids = kconserve_indices(-kpt) log.debug1("Symmetry pattern (k - %s)*a= 2n pi", kpt) log.debug1(" make_kpt for uniq_kptji_ids %s", uniq_kptji_ids) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True # The k-point k' which has (k + k') * a = 2n pi. Metric integrals have the # symmetry S = S* uniq_kptji_ids = kconserve_indices(kpt) log.debug1("Symmetry pattern (k + %s)*a= 2n pi", kpt) log.debug1(" make_kpt for %s", uniq_kptji_ids) cholesky_j2c = conj_j2c(cholesky_j2c) for uniq_kptji_id in uniq_kptji_ids: if not done[uniq_kptji_id]: make_kpt(uniq_kptji_id, cholesky_j2c) done[uniq_kptji_ids] = True feri.close()
def get_eri(mydf, kpts=None, compact=True): cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl q = kptj - kpti coulG = mydf.weighted_coulG(q, False, mydf.gs) nao = cell.nao_nr() nao_pair = nao * (nao + 1) // 2 max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .8) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl): eriR = numpy.zeros((nao_pair, nao_pair)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG lib.ddot(pqkR, pqkR.T, 1, eriR, 1) lib.ddot(pqkI, pqkI.T, 1, eriR, 1) pqkR = pqkI = None if not compact: eriR = ao2mo.restore(1, eriR, nao).reshape(nao**2, -1) return eriR #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # # complex integrals, N^4 elements elif is_zero(kpti - kptl) and is_zero(kptj - kptk): eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory): vG = numpy.sqrt(coulG[p0:p1]) pqkR *= vG pqkI *= vG # rho_pq(G+k_pq) * conj(rho_rs(G-k_rs)) zdotNC(pqkR, pqkI, pqkR.T, pqkI.T, 1, eriR, eriI, 1) pqkR = pqkI = None pqkR = pqkI = coulG = None # transpose(0,1,3,2) because # j == k && i == l => # (L|ij).transpose(0,2,1).conj() = (L^*|ji) = (L^*|kl) => (M|kl) # rho_rs(-G+k_rs) = conj(transpose(rho_sr(G+k_sr), (0,2,1))) eri = lib.transpose((eriR + eriI * 1j).reshape(-1, nao, nao), axes=(0, 2, 1)) return eri.reshape(nao**2, -1) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: eriR = numpy.zeros((nao**2, nao**2)) eriI = numpy.zeros((nao**2, nao**2)) # rho_rs(-G-k) = rho_rs(conj(G+k)) = conj(rho_sr(G+k)) for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mydf.gs, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mydf.gs,-kptijkl[2:], q, max_memory=max_memory*.5)): pqkR *= coulG[p0:p1] pqkI *= coulG[p0:p1] # rho_pq(G+k_pq) * conj(rho_sr(G+k_pq)) zdotNC(pqkR, pqkI, rskR.T, rskI.T, 1, eriR, eriI, 1) pqkR = pqkI = rskR = rskI = None return (eriR + eriI * 1j)
def _make_j3c(mydf, cell, auxcell, kptij_lst, cderi_file): log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) fused_cell, fuse = fuse_auxcell(mydf, mydf.auxcell) ao_loc = cell.ao_loc_nr() nao = ao_loc[-1] naux = auxcell.nao_nr() nkptij = len(kptij_lst) mesh = mydf.mesh Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([numpy.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] kptis = kptij_lst[:, 0] kptjs = kptij_lst[:, 1] kpt_ji = kptjs - kptis uniq_kpts, uniq_index, uniq_inverse = unique(kpt_ji) log.debug('Num uniq kpts %d', len(uniq_kpts)) log.debug2('uniq_kpts %s', uniq_kpts) # j2c ~ (-kpt_ji | kpt_ji) j2c = fused_cell.pbc_intor('int2c2e', hermi=1, kpts=uniq_kpts) j2ctags = [] t1 = log.timer_debug1('2c2e', *t1) swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) # Unlink swapfile to avoid trash swapfile = None mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) blksize = max(2048, int(max_memory * .5e6 / 16 / fused_cell.nao_nr())) log.debug2('max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): coulG = mydf.weighted_coulG(kpt, False, mesh) j2c_k = numpy.zeros_like(j2c[k]) for p0, p1 in mydf.prange(0, ngrids, blksize): aoaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T LkR = numpy.asarray(aoaux.real, order='C') LkI = numpy.asarray(aoaux.imag, order='C') aoaux = None if is_zero(kpt): # kpti == kptj j2c_k[naux:] += lib.ddot(LkR[naux:] * coulG[p0:p1], LkR.T) j2c_k[naux:] += lib.ddot(LkI[naux:] * coulG[p0:p1], LkI.T) else: j2cR, j2cI = zdotCN(LkR[naux:] * coulG[p0:p1], LkI[naux:] * coulG[p0:p1], LkR.T, LkI.T) j2c_k[naux:] += j2cR + j2cI * 1j kLR = kLI = None j2c_k[:naux, naux:] = j2c_k[naux:, :naux].conj().T j2c[k] -= mpi.allreduce(j2c_k) j2c[k] = fuse(fuse(j2c[k]).T).T try: fswap['j2c/%d' % k] = scipy.linalg.cholesky(j2c[k], lower=True) j2ctags.append('CD') except scipy.linalg.LinAlgError as e: #msg =('===================================\n' # 'J-metric not positive definite.\n' # 'It is likely that mesh is not enough.\n' # '===================================') #log.error(msg) #raise scipy.linalg.LinAlgError('\n'.join([str(e), msg])) w, v = scipy.linalg.eigh(j2c[k]) log.debug2('metric linear dependency for kpt %s', k) log.debug2('cond = %.4g, drop %d bfns', w[0] / w[-1], numpy.count_nonzero(w < mydf.linear_dep_threshold)) v1 = v[:, w > mydf.linear_dep_threshold].T.conj() v1 /= numpy.sqrt(w[w > mydf.linear_dep_threshold]).reshape(-1, 1) fswap['j2c/%d' % k] = v1 if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': idx = numpy.where(w < -mydf.linear_dep_threshold)[0] if len(idx) > 0: fswap['j2c-/%d' % k] = (v[:, idx] / numpy.sqrt(-w[idx])).conj().T w = v = v1 = None j2ctags.append('eig') j2c = coulG = None aosym_s2 = numpy.einsum('ix->i', abs(kptis - kptjs)) < 1e-9 j_only = numpy.all(aosym_s2) if gamma_point(kptij_lst): dtype = 'f8' else: dtype = 'c16' t1 = log.timer_debug1('aoaux and int2c', *t1) # Estimates the buffer size based on the last contraction in G-space. # This contraction requires to hold nkptj copies of (naux,?) array # simultaneously in memory. mem_now = max(comm.allgather(lib.current_memory()[0])) max_memory = max(2000, mydf.max_memory - mem_now) nkptj_max = max((uniq_inverse == x).sum() for x in set(uniq_inverse)) buflen = max( int( min(max_memory * .5e6 / 16 / naux / (nkptj_max + 2) / nao, nao / 3 / mpi.pool.size)), 1) chunks = (buflen, nao) j3c_jobs = grids2d_int3c_jobs(cell, auxcell, kptij_lst, chunks, j_only) log.debug1('max_memory = %d MB (%d in use) chunks %s', max_memory, mem_now, chunks) log.debug2('j3c_jobs %s', j3c_jobs) if j_only: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's2', 1, kptij_lst) else: int3c = wrap_int3c(cell, fused_cell, 'int3c2e', 's1', 1, kptij_lst) idxb = numpy.tril_indices(nao) idxb = (idxb[0] * nao + idxb[1]).astype('i') aux_loc = fused_cell.ao_loc_nr('ssc' in 'int3c2e') def gen_int3c(job_id, ish0, ish1): dataname = 'j3c-chunks/%d' % job_id i0 = ao_loc[ish0] i1 = ao_loc[ish1] dii = i1 * (i1 + 1) // 2 - i0 * (i0 + 1) // 2 if j_only: dij = dii buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dii + dii))) else: dij = (i1 - i0) * nao buflen = max(8, int(max_memory * 1e6 / 16 / (nkptij * dij + dij))) auxranges = balance_segs(aux_loc[1:] - aux_loc[:-1], buflen) buflen = max([x[2] for x in auxranges]) buf = numpy.empty(nkptij * dij * buflen, dtype=dtype) buf1 = numpy.empty(dij * buflen, dtype=dtype) naux = aux_loc[-1] for kpt_id, kptij in enumerate(kptij_lst): key = '%s/%d' % (dataname, kpt_id) if aosym_s2[kpt_id]: shape = (naux, dii) else: shape = (naux, dij) if gamma_point(kptij): fswap.create_dataset(key, shape, 'f8') else: fswap.create_dataset(key, shape, 'c16') naux0 = 0 for istep, auxrange in enumerate(auxranges): log.alldebug2("aux_e1 job_id %d step %d", job_id, istep) sh0, sh1, nrow = auxrange sub_slice = (ish0, ish1, 0, cell.nbas, sh0, sh1) mat = numpy.ndarray((nkptij, dij, nrow), dtype=dtype, buffer=buf) mat = int3c(sub_slice, mat) for k, kptij in enumerate(kptij_lst): h5dat = fswap['%s/%d' % (dataname, k)] v = lib.transpose(mat[k], out=buf1) if not j_only and aosym_s2[k]: idy = idxb[i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2] - i0 * nao out = numpy.ndarray((nrow, dii), dtype=v.dtype, buffer=mat[k]) v = numpy.take(v, idy, axis=1, out=out) if gamma_point(kptij): h5dat[naux0:naux0 + nrow] = v.real else: h5dat[naux0:naux0 + nrow] = v naux0 += nrow def ft_fuse(job_id, uniq_kptji_id, sh0, sh1): kpt = uniq_kpts[uniq_kptji_id] # kpt = kptj - kpti adapted_ji_idx = numpy.where(uniq_inverse == uniq_kptji_id)[0] adapted_kptjs = kptjs[adapted_ji_idx] nkptj = len(adapted_kptjs) j2c = numpy.asarray(fswap['j2c/%d' % uniq_kptji_id]) j2ctag = j2ctags[uniq_kptji_id] naux0 = j2c.shape[0] if ('j2c-/%d' % uniq_kptji_id) in fswap: j2c_negative = numpy.asarray(fswap['j2c-/%d' % uniq_kptji_id]) else: j2c_negative = None if is_zero(kpt): aosym = 's2' else: aosym = 's1' if aosym == 's2' and cell.dimension == 3: vbar = fuse(mydf.auxbar(fused_cell)) ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=adapted_kptjs) ovlp = [lib.pack_tril(s) for s in ovlp] j3cR = [None] * nkptj j3cI = [None] * nkptj i0 = ao_loc[sh0] i1 = ao_loc[sh1] for k, idx in enumerate(adapted_ji_idx): key = 'j3c-chunks/%d/%d' % (job_id, idx) v = numpy.asarray(fswap[key]) if aosym == 's2' and cell.dimension == 3: for i in numpy.where(vbar != 0)[0]: v[i] -= vbar[i] * ovlp[k][i0 * (i0 + 1) // 2:i1 * (i1 + 1) // 2].ravel() j3cR[k] = numpy.asarray(v.real, order='C') if v.dtype == numpy.complex128: j3cI[k] = numpy.asarray(v.imag, order='C') v = None ncol = j3cR[0].shape[1] Gblksize = max(16, int(max_memory * 1e6 / 16 / ncol / (nkptj + 1))) # +1 for pqkRbuf/pqkIbuf Gblksize = min(Gblksize, ngrids, 16384) pqkRbuf = numpy.empty(ncol * Gblksize) pqkIbuf = numpy.empty(ncol * Gblksize) buf = numpy.empty(nkptj * ncol * Gblksize, dtype=numpy.complex128) log.alldebug2('job_id %d blksize (%d,%d)', job_id, Gblksize, ncol) wcoulG = mydf.weighted_coulG(kpt, False, mesh) fused_cell_slice = (auxcell.nbas, fused_cell.nbas) if aosym == 's2': shls_slice = (sh0, sh1, 0, sh1) else: shls_slice = (sh0, sh1, 0, cell.nbas) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gaux = ft_ao.ft_ao(fused_cell, Gv[p0:p1], fused_cell_slice, b, gxyz[p0:p1], Gvbase, kpt) Gaux *= wcoulG[p0:p1, None] kLR = Gaux.real.copy('C') kLI = Gaux.imag.copy('C') Gaux = None dat = ft_ao._ft_aopair_kpts(cell, Gv[p0:p1], shls_slice, aosym, b, gxyz[p0:p1], Gvbase, kpt, adapted_kptjs, out=buf) nG = p1 - p0 for k, ji in enumerate(adapted_ji_idx): aoao = dat[k].reshape(nG, ncol) pqkR = numpy.ndarray((ncol, nG), buffer=pqkRbuf) pqkI = numpy.ndarray((ncol, nG), buffer=pqkIbuf) pqkR[:] = aoao.real.T pqkI[:] = aoao.imag.T lib.dot(kLR.T, pqkR.T, -1, j3cR[k][naux:], 1) lib.dot(kLI.T, pqkI.T, -1, j3cR[k][naux:], 1) if not (is_zero(kpt) and gamma_point(adapted_kptjs[k])): lib.dot(kLR.T, pqkI.T, -1, j3cI[k][naux:], 1) lib.dot(kLI.T, pqkR.T, 1, j3cI[k][naux:], 1) kLR = kLI = None for k, idx in enumerate(adapted_ji_idx): if is_zero(kpt) and gamma_point(adapted_kptjs[k]): v = fuse(j3cR[k]) else: v = fuse(j3cR[k] + j3cI[k] * 1j) if j2ctag == 'CD': v = scipy.linalg.solve_triangular(j2c, v, lower=True, overwrite_b=True) fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = v else: fswap['j3c-chunks/%d/%d' % (job_id, idx)][:naux0] = lib.dot( j2c, v) # low-dimension systems if j2c_negative is not None: fswap['j3c-/%d/%d' % (job_id, idx)] = lib.dot(j2c_negative, v) _assemble(mydf, kptij_lst, j3c_jobs, gen_int3c, ft_fuse, cderi_file, fswap, log)
def general(mydf, mo_coeffs, kpts=None, compact=getattr(__config__, 'pbc_df_ao2mo_general_compact', True)): warn_pbc2d_eri(mydf) cell = mydf.cell kptijkl = _format_kpts(kpts) kpti, kptj, kptk, kptl = kptijkl if isinstance(mo_coeffs, numpy.ndarray) and mo_coeffs.ndim == 2: mo_coeffs = (mo_coeffs,) * 4 if not _iskconserv(cell, kptijkl): lib.logger.warn(cell, 'aft_ao2mo: momentum conservation not found in ' 'the given k-points %s', kptijkl) return numpy.zeros([mo.shape[1] for mo in mo_coeffs]) q = kptj - kpti mesh = mydf.mesh coulG = mydf.weighted_coulG(q, False, mesh) all_real = not any(numpy.iscomplexobj(mo) for mo in mo_coeffs) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0]) * .5) #################### # gamma point, the integral is real and with s4 symmetry if gamma_point(kptijkl) and all_real: ijmosym, nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) eri_mo = numpy.zeros((nij_pair,nkl_pair)) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[2]) and iden_coeffs(mo_coeffs[1], mo_coeffs[3])) ijR = ijI = klR = klI = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory, aosym='s2'): buf = lib.transpose(pqkR, out=buf) ijR, klR = _dtrans(buf, ijR, ijmosym, moij, ijslice, buf, klR, klmosym, mokl, klslice, sym) lib.ddot(ijR.T, klR*coulG[p0:p1,None], 1, eri_mo, 1) buf = lib.transpose(pqkI, out=buf) ijI, klI = _dtrans(buf, ijI, ijmosym, moij, ijslice, buf, klI, klmosym, mokl, klslice, sym) lib.ddot(ijI.T, klI*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None return eri_mo #################### # (kpt) i == j == k == l != 0 # (kpt) i == l && j == k && i != j && j != k => # elif is_zero(kpti-kptl) and is_zero(kptj-kptk): mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nlk_pair, molk, lkslice = _conc_mos(mo_coeffs[3], mo_coeffs[2])[1:] eri_mo = numpy.zeros((nij_pair,nlk_pair), dtype=numpy.complex) sym = (iden_coeffs(mo_coeffs[0], mo_coeffs[3]) and iden_coeffs(mo_coeffs[1], mo_coeffs[2])) zij = zlk = buf = None for pqkR, pqkI, p0, p1 \ in mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij, zlk = _ztrans(buf, zij, moij, ijslice, buf, zlk, molk, lkslice, sym) lib.dot(zij.T, zlk.conj()*coulG[p0:p1,None], 1, eri_mo, 1) pqkR = pqkI = None nmok = mo_coeffs[2].shape[1] nmol = mo_coeffs[3].shape[1] eri_mo = lib.transpose(eri_mo.reshape(-1,nmol,nmok), axes=(0,2,1)) return eri_mo.reshape(nij_pair,nlk_pair) #################### # aosym = s1, complex integrals # # If kpti == kptj, (kptl-kptk)*a has to be multiples of 2pi because of the wave # vector symmetry. k is a fraction of reciprocal basis, 0 < k/b < 1, by definition. # So kptl/b - kptk/b must be -1 < k/b < 1. => kptl == kptk # else: mo_coeffs = _mo_as_complex(mo_coeffs) nij_pair, moij, ijslice = _conc_mos(mo_coeffs[0], mo_coeffs[1])[1:] nkl_pair, mokl, klslice = _conc_mos(mo_coeffs[2], mo_coeffs[3])[1:] eri_mo = numpy.zeros((nij_pair,nkl_pair), dtype=numpy.complex) tao = [] ao_loc = None zij = zkl = buf = None for (pqkR, pqkI, p0, p1), (rskR, rskI, q0, q1) in \ lib.izip(mydf.pw_loop(mesh, kptijkl[:2], q, max_memory=max_memory*.5), mydf.pw_loop(mesh,-kptijkl[2:], q, max_memory=max_memory*.5)): buf = lib.transpose(pqkR+pqkI*1j, out=buf) zij = _ao2mo.r_e2(buf, moij, ijslice, tao, ao_loc, out=zij) buf = lib.transpose(rskR-rskI*1j, out=buf) zkl = _ao2mo.r_e2(buf, mokl, klslice, tao, ao_loc, out=zkl) zij *= coulG[p0:p1,None] lib.dot(zij.T, zkl, 1, eri_mo, 1) pqkR = pqkI = rskR = rskI = None return eri_mo