def get_jk(self, cell=None, dm=None, hermi=1, kpt=None, kpt_band=None): '''Get Coulomb (J) and exchange (K) following :func:`scf.hf.RHF.get_jk_`. Note the incore version, which initializes an _eri array in memory. ''' if cell is None: cell = self.cell if dm is None: dm = self.make_rdm1() if kpt is None: kpt = self.kpt cpu0 = (time.clock(), time.time()) if (kpt_band is None and (self.exxdiv == 'ewald' or self.exxdiv is None) and (self._eri is not None or cell.incore_anyway or self._is_mem_enough())): if self._eri is None: logger.debug(self, 'Building PBC AO integrals incore') self._eri = self.with_df.get_ao_eri(kpt, compact=True) vj, vk = dot_eri_dm(self._eri, dm, hermi) if self.exxdiv == 'ewald': from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 # G=0 is not inculded in the ._eri integrals _ewald_exxdiv_for_G0(self.cell, kpt, [dm], [vk]) else: vj, vk = self.with_df.get_jk(dm, hermi, kpt, kpt_band, exxdiv=self.exxdiv) logger.timer(self, 'vj and vk', *cpu0) return vj, vk
def get_jk(self, cell=None, dm=None, hermi=1, kpt=None, kpts_band=None, with_j=True, with_k=True, omega=None, **kwargs): r'''Get Coulomb (J) and exchange (K) following :func:`scf.hf.RHF.get_jk_`. for particular k-point (kpt). When kpts_band is given, the J, K matrices on kpts_band are evaluated. J_{pq} = \sum_{rs} (pq|rs) dm[s,r] K_{pq} = \sum_{rs} (pr|sq) dm[r,s] where r,s are orbitals on kpt. p and q are orbitals on kpts_band if kpts_band is given otherwise p and q are orbitals on kpt. ''' if cell is None: cell = self.cell if dm is None: dm = self.make_rdm1() if kpt is None: kpt = self.kpt cpu0 = (time.clock(), time.time()) dm = np.asarray(dm) nao = dm.shape[-1] if (not omega and kpts_band is None and (self.exxdiv == 'ewald' or not self.exxdiv) and (self._eri is not None or cell.incore_anyway or (not self.direct_scf and self._is_mem_enough()))): if self._eri is None: logger.debug(self, 'Building PBC AO integrals incore') self._eri = self.with_df.get_ao_eri(kpt, compact=True) vj, vk = mol_hf.dot_eri_dm(self._eri, dm, hermi, with_j, with_k) if with_k and self.exxdiv == 'ewald': from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 # G=0 is not inculded in the ._eri integrals _ewald_exxdiv_for_G0(self.cell, kpt, dm.reshape(-1, nao, nao), vk.reshape(-1, nao, nao)) else: vj, vk = self.with_df.get_jk(dm.reshape(-1, nao, nao), hermi, kpt, kpts_band, with_j, with_k, omega, exxdiv=self.exxdiv) if with_j: vj = _format_jks(vj, dm, kpts_band) if with_k: vk = _format_jks(vk, dm, kpts_band) logger.timer(self, 'vj and vk', *cpu0) return vj, vk
def get_jk(self, cell=None, dm=None, hermi=1, kpt=None, kpts_band=None): r'''Get Coulomb (J) and exchange (K) following :func:`scf.hf.RHF.get_jk_`. for particular k-point (kpt). When kpts_band is given, the J, K matrices on kpts_band are evaluated. J_{pq} = \sum_{rs} (pq|rs) dm[s,r] K_{pq} = \sum_{rs} (pr|sq) dm[r,s] where r,s are orbitals on kpt. p and q are orbitals on kpts_band if kpts_band is given otherwise p and q are orbitals on kpt. ''' if cell is None: cell = self.cell if dm is None: dm = self.make_rdm1() if kpt is None: kpt = self.kpt cpu0 = (time.clock(), time.time()) dm = np.asarray(dm) nao = dm.shape[-1] if (kpts_band is None and (self.exxdiv == 'ewald' or not self.exxdiv) and (self._eri is not None or cell.incore_anyway or (not self.direct_scf and self._is_mem_enough()))): if self._eri is None: logger.debug(self, 'Building PBC AO integrals incore') self._eri = self.with_df.get_ao_eri(kpt, compact=True) vj, vk = mol_hf.dot_eri_dm(self._eri, dm, hermi) if self.exxdiv == 'ewald': from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 # G=0 is not inculded in the ._eri integrals _ewald_exxdiv_for_G0(self.cell, kpt, dm.reshape(-1,nao,nao), vk.reshape(-1,nao,nao)) else: vj, vk = self.with_df.get_jk(dm.reshape(-1,nao,nao), hermi, kpt, kpts_band, exxdiv=self.exxdiv) logger.timer(self, 'vj and vk', *cpu0) vj = _format_jks(vj, dm, kpts_band) vk = _format_jks(vk, dm, kpts_band) return vj, vk
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpts_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpts_band is not None and abs(kpt-kpts_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpts_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpts_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) mesh = mydf.mesh kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mesh) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mesh) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) pLqR = pLqI = None for pqkR, pqkI, p0, p1 in mydf.pw_loop(mesh, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=pLqR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=pLqI).reshape(-1,nao) nG = p1 - p0 iLkR = numpy.ndarray((nao,nG,nao), buffer=pqkR) iLkI = numpy.ndarray((nao,nG,nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR.reshape(nao*nG,nao)) lib.dot(pLqI, dmsR[i], 1, iLkI.reshape(nao*nG,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mesh) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) #buf1R = numpy.empty((blksize*nao**2)) #buf1I = numpy.empty((blksize*nao**2)) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): nG = p1 - p0 bufR = numpy.empty((nG*nao**2)) bufI = numpy.empty((nG*nao**2)) buf1R = numpy.empty((nG*nao**2)) buf1I = numpy.empty((nG*nao**2)) for k, aoao in enumerate(aoaoks): ki = kpti_idx[k] kj = kptj_idx[k] # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = numpy.ndarray((nao,nG,nao), buffer=bufR) pLqI = numpy.ndarray((nao,nG,nao), buffer=bufI) pLqR[:] = aoao.real.reshape(nG,nao,nao).transpose(1,0,2) pLqI[:] = aoao.imag.reshape(nG,nao,nao).transpose(1,0,2) iLkR = numpy.ndarray((nao,nG,nao), buffer=buf1R) iLkI = numpy.ndarray((nao,nG,nao), buffer=buf1I) for i in range(nset): zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): for i in range(nset): zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR.reshape(nao,-1), iLkI.reshape(nao,-1)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)'%ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, single_kpt_band = _format_kpts_band(kpts_band, kpts) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) bufR = numpy.empty((blksize*nao**2)) bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): ki = kpti_idx[k] kj = kptj_idx[k] coulG = numpy.sqrt(vkcoulG[p0:p1]) # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pqkR *= coulG pqkI *= coulG pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR) pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI) iLkR = numpy.empty((nao*(p1-p0),nao)) iLkI = numpy.empty((nao*(p1-p0),nao)) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao,-1) iLkI = iLkI.reshape(nao,-1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR, iLkI) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # G=0 was not included in the non-uniform grids if cell.dimension != 3 and exxdiv: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, kpts_band, kpts, single_kpt_band)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpt_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mydf.gs) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) bufR = numpy.empty(blksize*nao**2) bufI = numpy.empty(blksize*nao**2) for pqkR, pqkI, p0, p1 in mydf.pw_loop(mydf.gs, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: coulG = numpy.sqrt(vkcoulG[p0:p1]) pqkR *= coulG pqkI *= coulG #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=bufR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=bufI).reshape(-1,nao) iLkR = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkR) iLkI = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR) lib.dot(pLqI, dmsR[i], 1, iLkI) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j if cell.dimension != 3 and exxdiv: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): '''Get the Coulomb (J) and exchange (K) AO matrices at sampled k-points. Args: dm_kpts : (nkpts, nao, nao) ndarray Density matrix at each k-point kpts : (nkpts, 3) ndarray Kwargs: hermi : int Whether K matrix is hermitian | 0 : not hermitian and not symmetric | 1 : hermitian kpts_band : (3,) ndarray or (*,3) ndarray A list of arbitrary "band" k-points at which to evalute the matrix. Returns: vj : (nkpts, nao, nao) ndarray vk : (nkpts, nao, nao) ndarray or list of vj and vk if the input dm_kpts is a list of DMs ''' cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if getattr(dm_kpts, 'mo_coeff', None) is not None: mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ else: mo_coeff = None kpts = np.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1. / nkpts * (cell.vol / ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = np.zeros((nset, nband, nao, nao), dtype=dms.dtype) else: vk_kpts = np.zeros((nset, nband, nao, nao), dtype=np.complex128) coords = mydf.grids.coords ao2_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts) ] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [ np.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band) ] if mo_coeff is not None and nset == 1: mo_coeff = [ mo_coeff[k][:, occ > 0] * np.sqrt(occ[occ > 0]) for k, occ in enumerate(mo_occ) ] ao2_kpts = [np.dot(mo_coeff[k].T, ao) for k, ao in enumerate(ao2_kpts)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int( min(nao, max(1, (max_memory - mem_now) * 1e6 / 16 / 4 / ngrids / nao))) lib.logger.debug1(mydf, 'fft_jk: get_k_kpts max_memory %s blksize %d', max_memory, blksize) #ao1_dtype = np.result_type(*ao1_kpts) #ao2_dtype = np.result_type(*ao2_kpts) vR_dm = np.empty((nset, nao, ngrids), dtype=vk_kpts.dtype) t1 = (time.clock(), time.time()) for k2, ao2T in enumerate(ao2_kpts): if ao2T.size == 0: continue kpt2 = kpts[k2] naoj = ao2T.shape[0] if mo_coeff is None or nset > 1: ao_dms = [lib.dot(dms[i, k2], ao2T.conj()) for i in range(nset)] else: ao_dms = [ao2T.conj()] for k1, ao1T in enumerate(ao1_kpts): kpt1 = kpts_band[k1] # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2 - kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2 - kpt1, True, mydf, mesh) if is_zero(kpt1 - kpt2): expmikr = np.array(1.) else: expmikr = np.exp(-1j * np.dot(coords, kpt2 - kpt1)) for p0, p1 in lib.prange(0, nao, blksize): rho1 = np.einsum('ig,jg->ijg', ao1T[p0:p1].conj() * expmikr, ao2T) vG = tools.fft(rho1.reshape(-1, ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1 - p0, naoj, ngrids) vG = None if vR_dm.dtype == np.double: vR = vR.real for i in range(nset): np.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i, p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i, k1] += weight * lib.dot(vR_dm[i], ao1T.T) t1 = lib.logger.timer_debug1(mydf, 'get_k_kpts: make_kpt (%d,*)' % k2, *t1) # Function _ewald_exxdiv_for_G0 to add back in the G=0 component to vk_kpts # Note in the _ewald_exxdiv_for_G0 implementation, the G=0 treatments are # different for 1D/2D and 3D systems. The special treatments for 1D and 2D # can only be used with AFTDF/GDF/MDF method. In the FFTDF method, 1D, 2D # and 3D should use the ewald probe charge correction. if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): mydf = _sync_mydf(mydf) cell = mydf.cell mesh = mydf.mesh coords = cell.gen_uniform_grids(mesh) ngrids = coords.shape[0] if hasattr(dm_kpts, 'mo_coeff'): if dm_kpts.ndim == 3: # KRHF mo_coeff = [dm_kpts.mo_coeff] mo_occ = [dm_kpts.mo_occ ] else: # KUHF mo_coeff = dm_kpts.mo_coeff mo_occ = dm_kpts.mo_occ elif hasattr(dm_kpts[0], 'mo_coeff'): mo_coeff = [dm.mo_coeff for dm in dm_kpts] mo_occ = [dm.mo_occ for dm in dm_kpts] else: mo_coeff = None kpts = numpy.asarray(kpts) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] weight = 1./nkpts * (cell.vol/ngrids) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=dms.dtype) else: vk_kpts = numpy.zeros((nset,nband,nao,nao), dtype=numpy.complex128) coords = mydf.grids.coords ao2_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts)] if input_band is None: ao1_kpts = ao2_kpts else: ao1_kpts = [numpy.asarray(ao.T, order='C') for ao in mydf._numint.eval_ao(cell, coords, kpts=kpts_band)] mem_now = lib.current_memory()[0] max_memory = mydf.max_memory - mem_now blksize = int(min(nao, max(1, (max_memory-mem_now)*1e6/16/4/ngrids/nao))) lib.logger.debug1(mydf, 'max_memory %s blksize %d', max_memory, blksize) ao1_dtype = numpy.result_type(*ao1_kpts) ao2_dtype = numpy.result_type(*ao2_kpts) vR_dm = numpy.empty((nset,nao,ngrids), dtype=vk_kpts.dtype) ao_dms_buf = [None] * nkpts tasks = [(k1,k2) for k2 in range(nkpts) for k1 in range(nband)] for k1, k2 in mpi.static_partition(tasks): ao1T = ao1_kpts[k1] ao2T = ao2_kpts[k2] kpt1 = kpts_band[k1] kpt2 = kpts[k2] if ao2T.size == 0 or ao1T.size == 0: continue # If we have an ewald exxdiv, we add the G=0 correction near the # end of the function to bypass any discretization errors # that arise from the FFT. mydf.exxdiv = exxdiv if exxdiv == 'ewald' or exxdiv is None: coulG = tools.get_coulG(cell, kpt2-kpt1, False, mydf, mesh) else: coulG = tools.get_coulG(cell, kpt2-kpt1, True, mydf, mesh) if is_zero(kpt1-kpt2): expmikr = numpy.array(1.) else: expmikr = numpy.exp(-1j * numpy.dot(coords, kpt2-kpt1)) if ao_dms_buf[k2] is None: if mo_coeff is None: ao_dms = [lib.dot(dm[k2], ao2T.conj()) for dm in dms] else: ao_dms = [] for i, dm in enumerate(dms): occ = mo_occ[i][k2] mo_scaled = mo_coeff[i][k2][:,occ>0] * numpy.sqrt(occ[occ>0]) ao_dms.append(lib.dot(mo_scaled.T, ao2T).conj()) ao_dms_buf[k2] = ao_dms else: ao_dms = ao_dms_buf[k2] if mo_coeff is None: for p0, p1 in lib.prange(0, nao, blksize): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao2T) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,nao,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real for i in range(nset): numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None else: for p0, p1 in lib.prange(0, nao, blksize): for i in range(nset): rho1 = numpy.einsum('ig,jg->ijg', ao1T[p0:p1].conj()*expmikr, ao_dms[i].conj()) vG = tools.fft(rho1.reshape(-1,ngrids), mesh) rho1 = None vG *= coulG vR = tools.ifft(vG, mesh).reshape(p1-p0,-1,ngrids) vG = None if vR_dm.dtype == numpy.double: vR = vR.real numpy.einsum('ijg,jg->ig', vR, ao_dms[i], out=vR_dm[i,p0:p1]) vR = None vR_dm *= expmikr.conj() for i in range(nset): vk_kpts[i,k1] += weight * lib.dot(vR_dm[i], ao1T.T) vk_kpts = mpi.reduce(lib.asarray(vk_kpts)) if gamma_point(kpts_band) and gamma_point(kpts): vk_kpts = vk_kpts.real if rank == 0: if exxdiv == 'ewald': _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts_band=kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(self, dm_kpts, hermi=1, kpts=np.zeros((1, 3)), kpts_band=None, exxdiv=None): ''' C ~ compact basis, D ~ diffused basis Compute K matrix with coulG_LR: (CC|CC) (CC|CD) (CC|DC) (CD|CC) (CD|CD) (CD|DC) (DC|CC) (DC|CD) (DC|DC) Compute K matrix with full coulG: (CC|DD) (CD|DD) (DC|DD) (DD|CC) (DD|CD) (DD|DC) (DD|DD) ''' cell = self.cell log = logger.Logger(self.stdout, self.verbose) t1 = (time.clock(), time.time()) mesh = self.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1, 1, 3) - kpts.reshape(1, -1, 3) kk_todo = np.ones(kk_table.shape[:2], dtype=bool) vkR = np.zeros((nset, nband, nao, nao)) vkI = np.zeros((nset, nband, nao, nao)) dmsR = np.asarray(dms.real, order='C') dmsI = np.asarray(dms.imag, order='C') weight = 1. / nkpts n_diffused = cell._nbas_each_set[2] nao_compact = cell.ao_loc[cell.nbas - n_diffused] mem_now = lib.current_memory()[0] max_memory = max(2000, (self.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = np.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = np.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx, kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx, kpti_idx] = False max_memory1 = max_memory * (nkptj + 1) / (nkptj + 5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = np.empty((blksize*nao**2)) #bufI = np.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG vkcoulG = self.weighted_coulG(kpt, exxdiv, mesh) coulG_SR = self.weighted_coulG_SR(kpt, False, mesh) coulG_LR = vkcoulG - coulG_SR kptjs = kpts[kptj_idx] perm_sym = swap_2e and not is_zero(kpt) for aoaoks, p0, p1 in self.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): if nao_compact < nao: aoaoks = [aoao.reshape(-1, nao, nao) for aoao in aoaoks] aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), vkcoulG[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for aoao in aoaoks: aoao[:, nao_compact:, nao_compact:] = 0 aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), coulG_SR[p0:p1], -weight, kpti_idx, kptj_idx, perm_sym) else: aft_jk._update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), coulG_LR[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki, kj]: make_kpt(kptj - kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)' % ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not np.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j # G=0 associated to 2e integrals in real-space if cell.dimension >= 2: ovlp = np.asarray(cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts)) ovlp[:, nao_compact:, nao_compact:] = 0 kws = cell.get_Gv_weights(mesh)[2] G0_weight = kws[0] if isinstance(kws, np.ndarray) else kws vk_G0 = lib.einsum('kpq,nkqr,krs->nkps', ovlp, dm_kpts, ovlp) vk_kpts -= np.pi / self.omega**2 * weight * G0_weight * vk_G0 # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpts_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' vj = vk = None if kpts_band is not None and abs(kpt-kpts_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, dm, hermi, kpt, kpts_band, exxdiv) if with_j: vj = get_j_kpts(mydf, dm, hermi, kpt, kpts_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) mesh = mydf.mesh kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mesh) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: vkcoulG = mydf.weighted_coulG(kpt_allow, exxdiv, mesh) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) #blksize = max(int(max_memory*.25e6/16/nao**2), 16) pLqR = pLqI = None for pqkR, pqkI, p0, p1 in mydf.pw_loop(mesh, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vj += numpy.einsum('ijkl,lk->ij', v4, dm) for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=pLqR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=pLqI).reshape(-1,nao) nG = p1 - p0 iLkR = numpy.ndarray((nao,nG,nao), buffer=pqkR) iLkI = numpy.ndarray((nao,nG,nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR.reshape(nao*nG,nao)) lib.dot(pLqI, dmsR[i], 1, iLkI.reshape(nao*nG,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao)) iLkR *= vkcoulG[p0:p1].reshape(1,nG,1) iLkI *= vkcoulG[p0:p1].reshape(1,nG,1) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) t1 = log.timer_debug1('aft_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (logger.process_clock(), logger.perf_counter()) mesh = mydf.mesh dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] swap_2e = (kpts_band is None) kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) #blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) #bufR = numpy.empty((blksize*nao**2)) #bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG vkcoulG = mydf.weighted_coulG(kpt, exxdiv, mesh) kptjs = kpts[kptj_idx] weight = 1./len(kpts) perm_sym = swap_2e and not is_zero(kpt) for aoaoks, p0, p1 in mydf.ft_loop(mesh, kpt, kptjs, max_memory=max_memory1): _update_vk_((vkR, vkI), aoaoks, (dmsR, dmsI), vkcoulG[p0:p1], weight, kpti_idx, kptj_idx, perm_sym) for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) t1 = log.timer_debug1('get_k_kpts: make_kpt (%d,*)'%ki, *t1) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts, kpts_band) return _format_jks(vk_kpts, dm_kpts, input_band, kpts)
def get_k_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpt_band=None, exxdiv=None): cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm_kpts = lib.asarray(dm_kpts, order='C') dms = _format_dms(dm_kpts, kpts) nset, nkpts, nao = dms.shape[:3] if kpt_band is None: kpts_band = kpts swap_2e = True else: kpts_band = numpy.reshape(kpt_band, (-1,3)) nband = len(kpts_band) kk_table = kpts_band.reshape(-1,1,3) - kpts.reshape(1,-1,3) kk_todo = numpy.ones(kk_table.shape[:2], dtype=bool) vkR = numpy.zeros((nset,nband,nao,nao)) vkI = numpy.zeros((nset,nband,nao,nao)) dmsR = numpy.asarray(dms.real, order='C') dmsI = numpy.asarray(dms.imag, order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) # K_pq = ( p{k1} i{k2} | i{k2} q{k1} ) def make_kpt(kpt): # kpt = kptj - kpti # search for all possible ki and kj that has ki-kj+kpt=0 kk_match = numpy.einsum('ijx->ij', abs(kk_table + kpt)) < 1e-9 kpti_idx, kptj_idx = numpy.where(kk_todo & kk_match) nkptj = len(kptj_idx) log.debug1('kpt = %s', kpt) log.debug2('kpti_idx = %s', kpti_idx) log.debug2('kptj_idx = %s', kptj_idx) kk_todo[kpti_idx,kptj_idx] = False if swap_2e and not is_zero(kpt): kk_todo[kptj_idx,kpti_idx] = False max_memory1 = max_memory * (nkptj+1)/(nkptj+5) blksize = max(int(max_memory1*4e6/(nkptj+5)/16/nao**2), 16) bufR = numpy.empty((blksize*nao**2)) bufI = numpy.empty((blksize*nao**2)) # Use DF object to mimic KRHF/KUHF object in function get_coulG mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt, True, mydf.gs) kptjs = kpts[kptj_idx] # <r|-G+k_rs|s> = conj(<s|G-k_rs|r>) = conj(<s|G+k_sr|r>) for k, pqkR, pqkI, p0, p1 \ in mydf.ft_loop(mydf.gs, kpt, kptjs, max_memory=max_memory1): ki = kpti_idx[k] kj = kptj_idx[k] coulG = numpy.sqrt(vkcoulG[p0:p1]) # case 1: k_pq = (pi|iq) #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pqkR *= coulG pqkI *= coulG pLqR = lib.transpose(pqkR.reshape(nao,nao,-1), axes=(0,2,1), out=bufR) pLqI = lib.transpose(pqkI.reshape(nao,nao,-1), axes=(0,2,1), out=bufI) iLkR = numpy.empty((nao*(p1-p0),nao)) iLkI = numpy.empty((nao*(p1-p0),nao)) for i in range(nset): iLkR, iLkI = zdotNN(pLqR.reshape(-1,nao), pLqI.reshape(-1,nao), dmsR[i,kj], dmsI[i,kj], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i,ki], vkI[i,ki], 1) # case 2: k_pq = (iq|pi) #:v4 = numpy.einsum('iLj,lLk->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,li->kj', v4, dm) if swap_2e and not is_zero(kpt): iLkR = iLkR.reshape(nao,-1) iLkI = iLkI.reshape(nao,-1) for i in range(nset): iLkR, iLkI = zdotNN(dmsR[i,ki], dmsI[i,ki], pLqR.reshape(nao,-1), pLqI.reshape(nao,-1), 1, iLkR, iLkI) zdotCN(pLqR.reshape(-1,nao).T, pLqI.reshape(-1,nao).T, iLkR.reshape(-1,nao), iLkI.reshape(-1,nao), 1, vkR[i,kj], vkI[i,kj], 1) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None for ki, kpti in enumerate(kpts_band): for kj, kptj in enumerate(kpts): if kk_todo[ki,kj]: make_kpt(kptj-kpti) if (gamma_point(kpts) and gamma_point(kpts_band) and not numpy.iscomplexobj(dm_kpts)): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j vk_kpts *= 1./nkpts # G=0 was not included in the non-uniform grids if cell.dimension != 3 and exxdiv is not None: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpts_band, dms, vk_kpts) if kpt_band is not None and numpy.shape(kpt_band) == (3,): if dm_kpts.ndim == 3: # One set of dm_kpts for KRHF return vk_kpts[0,0] else: return vk_kpts[:,0] else: return vk_kpts.reshape(dm_kpts.shape)
def get_jk(mydf, dm, hermi=1, kpt=numpy.zeros(3), kpt_band=None, with_j=True, with_k=True, exxdiv=None): '''JK for given k-point''' from pyscf.pbc.df.df_jk import _ewald_exxdiv_for_G0 vj = vk = None if kpt_band is not None and abs(kpt-kpt_band).sum() > 1e-9: kpt = numpy.reshape(kpt, (1,3)) if with_k: vk = get_k_kpts(mydf, [dm], hermi, kpt, kpt_band, exxdiv) if with_j: vj = get_j_kpts(mydf, [dm], hermi, kpt, kpt_band) return vj, vk cell = mydf.cell log = logger.Logger(mydf.stdout, mydf.verbose) t1 = (time.clock(), time.time()) dm = numpy.asarray(dm, order='C') dms = _format_dms(dm, [kpt]) nset, _, nao = dms.shape[:3] dms = dms.reshape(nset,nao,nao) j_real = gamma_point(kpt) k_real = gamma_point(kpt) and not numpy.iscomplexobj(dms) kptii = numpy.asarray((kpt,kpt)) kpt_allow = numpy.zeros(3) if with_j: vjcoulG = mydf.weighted_coulG(kpt_allow, False, mydf.gs) vjR = numpy.zeros((nset,nao,nao)) vjI = numpy.zeros((nset,nao,nao)) if with_k: mydf.exxdiv = exxdiv vkcoulG = mydf.weighted_coulG(kpt_allow, True, mydf.gs) vkR = numpy.zeros((nset,nao,nao)) vkI = numpy.zeros((nset,nao,nao)) dmsR = numpy.asarray(dms.real.reshape(nset,nao,nao), order='C') dmsI = numpy.asarray(dms.imag.reshape(nset,nao,nao), order='C') mem_now = lib.current_memory()[0] max_memory = max(2000, (mydf.max_memory - mem_now)) * .8 log.debug1('max_memory = %d MB (%d in use)', max_memory, mem_now) t2 = t1 # rho_rs(-G+k_rs) is computed as conj(rho_{rs^*}(G-k_rs)) # == conj(transpose(rho_sr(G+k_sr), (0,2,1))) blksize = max(int(max_memory*.25e6/16/nao**2), 16) bufR = numpy.empty(blksize*nao**2) bufI = numpy.empty(blksize*nao**2) for pqkR, pqkI, p0, p1 in mydf.pw_loop(mydf.gs, kptii, max_memory=max_memory): t2 = log.timer_debug1('%d:%d ft_aopair'%(p0,p1), *t2) pqkR = pqkR.reshape(nao,nao,-1) pqkI = pqkI.reshape(nao,nao,-1) if with_j: for i in range(nset): rhoR = numpy.einsum('pq,pqk->k', dmsR[i], pqkR) rhoR+= numpy.einsum('pq,pqk->k', dmsI[i], pqkI) rhoI = numpy.einsum('pq,pqk->k', dmsI[i], pqkR) rhoI-= numpy.einsum('pq,pqk->k', dmsR[i], pqkI) rhoR *= vjcoulG[p0:p1] rhoI *= vjcoulG[p0:p1] vjR[i] += numpy.einsum('pqk,k->pq', pqkR, rhoR) vjR[i] -= numpy.einsum('pqk,k->pq', pqkI, rhoI) if not j_real: vjI[i] += numpy.einsum('pqk,k->pq', pqkR, rhoI) vjI[i] += numpy.einsum('pqk,k->pq', pqkI, rhoR) #t2 = log.timer_debug1(' with_j', *t2) if with_k: coulG = numpy.sqrt(vkcoulG[p0:p1]) pqkR *= coulG pqkI *= coulG #:v4 = numpy.einsum('ijL,lkL->ijkl', pqk, pqk.conj()) #:vk += numpy.einsum('ijkl,jk->il', v4, dm) pLqR = lib.transpose(pqkR, axes=(0,2,1), out=bufR).reshape(-1,nao) pLqI = lib.transpose(pqkI, axes=(0,2,1), out=bufI).reshape(-1,nao) iLkR = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkR) iLkI = numpy.ndarray((nao*(p1-p0),nao), buffer=pqkI) for i in range(nset): if k_real: lib.dot(pLqR, dmsR[i], 1, iLkR) lib.dot(pLqI, dmsR[i], 1, iLkI) lib.dot(iLkR.reshape(nao,-1), pLqR.reshape(nao,-1).T, 1, vkR[i], 1) lib.dot(iLkI.reshape(nao,-1), pLqI.reshape(nao,-1).T, 1, vkR[i], 1) else: zdotNN(pLqR, pLqI, dmsR[i], dmsI[i], 1, iLkR, iLkI) zdotNC(iLkR.reshape(nao,-1), iLkI.reshape(nao,-1), pLqR.reshape(nao,-1).T, pLqI.reshape(nao,-1).T, 1, vkR[i], vkI[i]) #t2 = log.timer_debug1(' with_k', *t2) pqkR = pqkI = coulG = pLqR = pLqI = iLkR = iLkI = None #t2 = log.timer_debug1('%d:%d'%(p0,p1), *t2) bufR = bufI = None t1 = log.timer_debug1('pwdf_jk.get_jk', *t1) if with_j: if j_real: vj = vjR else: vj = vjR + vjI * 1j vj = vj.reshape(dm.shape) if with_k: if k_real: vk = vkR else: vk = vkR + vkI * 1j if cell.dimension != 3 and exxdiv is not None: assert(exxdiv.lower() == 'ewald') _ewald_exxdiv_for_G0(cell, kpt, dms, vk) vk = vk.reshape(dm.shape) return vj, vk