def get_pnucp(mydf, kpts=None): cell = mydf.cell if kpts is None: kpts_lst = numpy.zeros((1,3)) else: kpts_lst = numpy.reshape(kpts, (-1,3)) log = logger.Logger(mydf.stdout, mydf.verbose) t1 = t0 = (time.clock(), time.time()) nkpts = len(kpts_lst) nao = cell.nao_nr() nao_pair = nao * (nao+1) // 2 Gv, Gvbase, kws = cell.get_Gv_weights(mydf.gs) kpt_allow = numpy.zeros(3) if mydf.eta == 0: charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) SI = cell.get_SI(Gv) vGR = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.real) * kws vGI = numpy.einsum('i,ix->x', 4*numpy.pi*charge, SI.imag) * kws wjR = numpy.zeros((nkpts,nao_pair)) wjI = numpy.zeros((nkpts,nao_pair)) else: nuccell = copy.copy(cell) half_sph_norm = .5/numpy.sqrt(numpy.pi) norm = half_sph_norm/mole._gaussian_int(2, mydf.eta) chg_env = [mydf.eta, norm] ptr_eta = cell._env.size ptr_norm = ptr_eta + 1 chg_bas = [[ia, 0, 1, 1, 0, ptr_eta, ptr_norm, 0] for ia in range(cell.natm)] nuccell._atm = cell._atm nuccell._bas = numpy.asarray(chg_bas, dtype=numpy.int32) nuccell._env = numpy.hstack((cell._env, chg_env)) wj = lib.asarray(mydf._int_nuc_vloc(nuccell, kpts_lst, 'cint3c2e_pvp1_sph')) wjR = wj.real wjI = wj.imag t1 = log.timer_debug1('pnucp pass1: analytic int', *t1) charge = -cell.atom_charges() #coulG=4*numpy.pi/G^2 is cancelled with (sigma dot p i, sigma dot p j) aoaux = ft_ao.ft_ao(nuccell, Gv) vGR = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.real) * kws vGI = numpy.einsum('i,xi->x', 4*numpy.pi*charge, aoaux.imag) * kws max_memory = max(2000, mydf.max_memory-lib.current_memory()[0]) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt_allow, kpts_lst, max_memory=max_memory, aosym='s2'): # rho_ij(G) nuc(-G) / G^2 # = [Re(rho_ij(G)) + Im(rho_ij(G))*1j] [Re(nuc(G)) - Im(nuc(G))*1j] / G^2 if not pwdf_jk.gamma_point(kpts_lst[k]): wjI[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkI) wjI[k] -= numpy.einsum('k,xk->x', vGI[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGR[p0:p1], pqkR) wjR[k] += numpy.einsum('k,xk->x', vGI[p0:p1], pqkI) t1 = log.timer_debug1('contracting Vnuc', *t1) if mydf.eta != 0 and cell.dimension == 3: nucbar = sum([z/nuccell.bas_exp(i)[0] for i,z in enumerate(charge)]) nucbar *= numpy.pi/cell.vol * 2 ovlp = cell.pbc_intor('cint1e_kin_sph', 1, lib.HERMITIAN, kpts_lst) for k in range(nkpts): s = lib.pack_tril(ovlp[k]) wjR[k] -= nucbar * s.real wjI[k] -= nucbar * s.imag wj = [] for k, kpt in enumerate(kpts_lst): if pwdf_jk.gamma_point(kpt): wj.append(lib.unpack_tril(wjR[k])) else: wj.append(lib.unpack_tril(wjR[k]+wjI[k]*1j)) if kpts is None or numpy.shape(kpts) == (3,): wj = wj[0] return wj
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, [dm], hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, [dm], hermi, kpt, kpt_band) return vj, vk log = logger.Logger(mydf.stdout, mydf.verbose) t2 = t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_jk', *t1) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) # .45 is estimation for the memory usage ratio sr_loop / (sr_loop+bufR+bufI) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') if with_j: vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) max_memory = max(2000, (mydf.max_memory - lib.current_memory()[0])) * .45 if with_k: buf1R = numpy.empty((mydf.blockdim*nao**2)) buf2R = numpy.empty((mydf.blockdim*nao**2)) buf3R = numpy.empty((mydf.blockdim*nao**2)) if not k_real: buf1I = numpy.empty((mydf.blockdim*nao**2)) buf2I = numpy.empty((mydf.blockdim*nao**2)) buf3I = numpy.empty((mydf.blockdim*nao**2)) def contract_k(pLqR, pLqI, pjqR, pjqI): # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:for i in range(nset): #: tmp = numpy.dot(dms[i], j3c.reshape(nao,-1)) #: vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: tmp = numpy.dot(dms[i], Lpq.reshape(nao,-1)) #: vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: vkR[i] += vk1.real #: vkI[i] += vk1.imag nrow = pLqR.shape[1] tmpR = numpy.ndarray((nao,nrow*nao), buffer=buf3R) if k_real: for i in range(nset): tmpR = lib.ddot(dmsR[i], pjqR.reshape(nao,-1), 1, tmpR) vk1R = lib.ddot(pLqR.reshape(-1,nao).T, tmpR.reshape(-1,nao)) vkR[i] += vk1R if hermi: vkR[i] += vk1R.T else: tmpR = lib.ddot(dmsR[i], pLqR.reshape(nao,-1), 1, tmpR) lib.ddot(pjqR.reshape(-1,nao).T, tmpR.reshape(-1,nao), 1, vkR[i], 1) else: tmpI = numpy.ndarray((nao,nrow*nao), buffer=buf3I) for i in range(nset): tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pjqR.reshape(nao,-1), pjqI.reshape(nao,-1), 1, tmpR, tmpI, 0) vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao)) vkR[i] += vk1R vkI[i] += vk1I if hermi: vkR[i] += vk1R.T vkI[i] -= vk1I.T else: tmpR, tmpI = zdotNN(dmsR[i], dmsI[i], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI, 0) zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i], vkI[i], 1) pLqI = pjqI = None thread_k = None for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, False): LpqR = LpqR.reshape(-1,nao,nao) LpqI = LpqI.reshape(-1,nao,nao) j3cR = j3cR.reshape(-1,nao,nao) j3cI = j3cI.reshape(-1,nao,nao) t2 = log.timer_debug1(' load', *t2) if thread_k is not None: thread_k.join() if with_j: #:rho_coeff = numpy.einsum('Lpq,xqp->xL', Lpq, dms) #:jaux = numpy.einsum('Lpq,xqp->xL', j3c, dms) #:vj += numpy.dot(jaux, Lpq.reshape(-1,nao**2)) #:vj += numpy.dot(rho_coeff, j3c.reshape(-1,nao**2)) rhoR = numpy.einsum('Lpq,xqp->xL', LpqR, dmsR) jauxR = numpy.einsum('Lpq,xqp->xL', j3cR, dmsR) if not j_real: rhoR -= numpy.einsum('Lpq,xqp->xL', LpqI, dmsI) rhoI = numpy.einsum('Lpq,xqp->xL', LpqR, dmsI) rhoI += numpy.einsum('Lpq,xqp->xL', LpqI, dmsR) jauxR-= numpy.einsum('Lpq,xqp->xL', j3cI, dmsI) jauxI = numpy.einsum('Lpq,xqp->xL', j3cR, dmsI) jauxI+= numpy.einsum('Lpq,xqp->xL', j3cI, dmsR) vjR += numpy.einsum('xL,Lpq->xpq', jauxR, LpqR) vjR += numpy.einsum('xL,Lpq->xpq', rhoR, j3cR) if not j_real: vjR -= numpy.einsum('xL,Lpq->xpq', jauxI, LpqI) vjR -= numpy.einsum('xL,Lpq->xpq', rhoI, j3cI) vjI += numpy.einsum('xL,Lpq->xpq', jauxR, LpqI) vjI += numpy.einsum('xL,Lpq->xpq', jauxI, LpqR) vjI += numpy.einsum('xL,Lpq->xpq', rhoR, j3cI) vjI += numpy.einsum('xL,Lpq->xpq', rhoI, j3cR) t2 = log.timer_debug1(' with_j', *t2) if with_k: nrow = LpqR.shape[0] pLqR = numpy.ndarray((nao,nrow,nao), buffer=buf1R) pjqR = numpy.ndarray((nao,nrow,nao), buffer=buf2R) pLqR[:] = LpqR.transpose(1,0,2) pjqR[:] = j3cR.transpose(1,0,2) if not k_real: pLqI = numpy.ndarray((nao,nrow,nao), buffer=buf1I) pjqI = numpy.ndarray((nao,nrow,nao), buffer=buf2I) pLqI[:] = LpqI.transpose(1,0,2) pjqI[:] = j3cI.transpose(1,0,2) thread_k = lib.background_thread(contract_k, pLqR, pLqI, pjqR, pjqI) t2 = log.timer_debug1(' with_k', *t2) LpqR = LpqI = j3cR = j3cI = None if thread_k is not None: thread_k.join() thread_k = None t1 = log.timer_debug1('mdf_jk.get_jk pass 1', *t1) vj, vk = pwdf_jk.get_jk(mydf, dm, hermi, kpt, kpt_band, with_j, with_k, exxdiv) if with_j: if j_real: vj += vjR.reshape(dm.shape) else: vj += (vjR+vjI*1j).reshape(dm.shape) vj = vj if with_k: if k_real: vk += vkR.reshape(dm.shape) else: vk += (vkR+vkI*1j).reshape(dm.shape) return vj, vk
def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_j_kpts', *t1) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] if kpt_band is None: kpts_band = kpts else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) j_real = gamma_point(kpts_band) mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .9 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) naux = mydf.auxcell.nao_nr() dmsR = dms.real.transpose(0,1,3,2).reshape(nset,nkpts,nao**2) dmsI = dms.imag.transpose(0,1,3,2).reshape(nset,nkpts,nao**2) rhoR = numpy.zeros((nset,naux)) rhoI = numpy.zeros((nset,naux)) jauxR = numpy.zeros((nset,naux)) jauxI = numpy.zeros((nset,naux)) for k, kpt in enumerate(kpts_band): kptii = numpy.asarray((kpt,kpt)) p1 = 0 for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, False): p0, p1 = p1, p1+LpqR.shape[0] #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:rho [:,p0:p1] += numpy.einsum('Lpq,xqp->xL', Lpq, dms[:,k]) #:jaux[:,p0:p1] += numpy.einsum('Lpq,xqp->xL', j3c, dms[:,k]) rhoR [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqR, dmsR[:,k]) rhoR [:,p0:p1]-= numpy.einsum('Lp,xp->xL', LpqI, dmsI[:,k]) rhoI [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqR, dmsI[:,k]) rhoI [:,p0:p1]+= numpy.einsum('Lp,xp->xL', LpqI, dmsR[:,k]) jauxR[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cR, dmsR[:,k]) jauxR[:,p0:p1]-= numpy.einsum('Lp,xp->xL', j3cI, dmsI[:,k]) jauxI[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cR, dmsI[:,k]) jauxI[:,p0:p1]+= numpy.einsum('Lp,xp->xL', j3cI, dmsR[:,k]) LpqR = LpqI = j3cR = j3cI = None weight = 1./nkpts jauxR *= weight jauxI *= weight rhoR *= weight rhoI *= weight vjR = numpy.zeros((nset,nband,nao,nao)) vjI = numpy.zeros((nset,nband,nao,nao)) for k, kpt in enumerate(kpts_band): kptii = numpy.asarray((kpt,kpt)) p1 = 0 for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptii, max_memory, True): p0, p1 = p1, p1+LpqR.shape[0] #:v = numpy.dot(jaux, Lpq) + numpy.dot(rho, j3c) #:vj_kpts[:,k] += lib.unpack_tril(v) v = numpy.dot(jauxR[:,p0:p1], LpqR) v -= numpy.dot(jauxI[:,p0:p1], LpqI) v += numpy.dot(rhoR [:,p0:p1], j3cR) v -= numpy.dot(rhoI [:,p0:p1], j3cI) vjR[:,k] += lib.unpack_tril(v) if not j_real: v = numpy.dot(jauxR[:,p0:p1], LpqI) v += numpy.dot(jauxI[:,p0:p1], LpqR) v += numpy.dot(rhoR [:,p0:p1], j3cI) v += numpy.dot(rhoI [:,p0:p1], j3cR) vjI[:,k] += lib.unpack_tril(v, lib.ANTIHERMI) LpqR = LpqI = j3cR = j3cI = None t1 = log.timer_debug1('get_j pass 2', *t1) vj_kpts = pwdf_jk.get_j_kpts(mydf, dm_kpts, hermi, kpts, kpt_band) if j_real: vj_kpts += vjR.reshape(vj_kpts.shape) else: vj_kpts += (vjR+vjI*1j).reshape(vj_kpts.shape) return vj_kpts
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) if mydf._cderi is None: mydf.build() t1 = log.timer_debug1('Init get_k_kpts', *t1) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] naux = mydf.auxcell.nao_nr() nao_pair = nao * (nao+1) // 2 if kpt_band is None: kpts_band = kpts swap_2e = True else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False # Note: kj-ki for electorn 1 and ki-kj for electron 2 # j2c ~ ({kj-ki}|{ks-kr}) ~ ({kj-ki}|-{kj-ki}) ~ ({kj-ki}|{ki-kj}) # j3c ~ (Q|kj,ki) = j3c{ji} = (Q|ki,kj)* = conj(transpose(j3c{ij}, (0,2,1))) bufR = numpy.empty((mydf.blockdim*nao**2)) bufI = numpy.empty((mydf.blockdim*nao**2)) for ki,kj in zip(kpti_idx,kptj_idx): kpti = kpts_band[ki] kptj = kpts[kj] kptij = numpy.asarray((kpti,kptj)) for LpqR, LpqI, j3cR, j3cI in mydf.sr_loop(kptij, max_memory, False): nrow = LpqR.shape[0] pLqR = numpy.ndarray((nao,nrow,nao), buffer=bufR) pLqI = numpy.ndarray((nao,nrow,nao), buffer=bufI) pjqR = numpy.ndarray((nao,nrow,nao), buffer=LpqR) pjqI = numpy.ndarray((nao,nrow,nao), buffer=LpqI) tmpR = numpy.ndarray((nao,nrow*nao), buffer=j3cR) tmpI = numpy.ndarray((nao,nrow*nao), buffer=j3cI) pLqR[:] = LpqR.reshape(-1,nao,nao).transpose(1,0,2) pLqI[:] = LpqI.reshape(-1,nao,nao).transpose(1,0,2) pjqR[:] = j3cR.reshape(-1,nao,nao).transpose(1,0,2) pjqI[:] = j3cI.reshape(-1,nao,nao).transpose(1,0,2) #:Lpq = LpqR + LpqI*1j #:j3c = j3cR + j3cI*1j #:for i in range(nset): #: dm = dms[i,ki] #: tmp = numpy.dot(dm, j3c.reshape(nao,-1)) #: vk1 = numpy.dot(Lpq.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: tmp = numpy.dot(dm, Lpq.reshape(nao,-1)) #: vk1+= numpy.dot(j3c.reshape(-1,nao).conj().T, tmp.reshape(-1,nao)) #: vkR[i,kj] += vk1.real #: vkI[i,kj] += vk1.imag #:if swap_2e and not is_zero(kpt): #: # K ~ 'Lij,Llk*,jk->il' + 'Llk*,Lij,jk->il' #: for i in range(nset): #: dm = dms[i,kj] #: tmp = numpy.dot(j3c.reshape(-1,nao), dm) #: vk1 = numpy.dot(tmp.reshape(nao,-1), Lpq.reshape(nao,-1).conj().T) #: tmp = numpy.dot(Lpq.reshape(-1,nao), dm) #: vk1+= numpy.dot(tmp.reshape(nao,-1), j3c.reshape(nao,-1).conj().T) #: vkR[i,ki] += vk1.real #: vkI[i,ki] += vk1.imag # K ~ 'iLj,lLk*,li->kj' + 'lLk*,iLj,li->kj' for i in range(nset): tmpR, tmpI = zdotNN(dmsR[i,ki], dmsI[i,ki], pjqR.reshape(nao,-1), pjqI.reshape(nao,-1), 1, tmpR, tmpI) vk1R, vk1I = zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao)) vkR[i,kj] += vk1R vkI[i,kj] += vk1I if hermi: vkR[i,kj] += vk1R.T vkI[i,kj] -= vk1I.T else: tmpR, tmpI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, tmpR, tmpI) zdotCN(pjqR.reshape(-1,nao).T, pjqI.reshape(-1,nao).T, tmpR.reshape(-1,nao), tmpI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) if swap_2e and not is_zero(kpt): tmpR = tmpR.reshape(nao*nrow,nao) tmpI = tmpI.reshape(nao*nrow,nao) # K ~ 'iLj,lLk*,jk->il' + 'lLk*,iLj,jk->il' for i in range(nset): tmpR, tmpI = zdotNN(pjqR.reshape(-1,nao), pjqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, tmpR, tmpI) vk1R, vk1I = zdotNC(tmpR.reshape(nao,-1), tmpI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T) vkR[i,ki] += vk1R vkI[i,ki] += vk1I if hermi: vkR[i,ki] += vk1R.T vkI[i,ki] -= vk1I.T else: tmpR, tmpI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, tmpR, tmpI) zdotNC(tmpR.reshape(nao,-1), tmpI.reshape(nao,-1), pjqR.reshape(nao,-1).T, pjqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) LpqR = LpqI = j3cR = j3cI = tmpR = tmpI = None return None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) vkR *= 1./nkpts vkI *= 1./nkpts vk_kpts = pwdf_jk.get_k_kpts(mydf, dm_kpts, hermi, kpts, kpt_band, exxdiv) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts += vkR.reshape(vk_kpts.shape) else: vk_kpts += (vkR+vkI*1j).reshape(vk_kpts.shape) return vk_kpts